Example #1
0
    def __init__(self, options):
        self.options = options
        self.imported = set()
        self.layers = []

        settings = {}
        with open(".build/config.yaml") as f:
            settings = yaml.load(f)

        self.languages = settings["available_locale_names"]
        exluded_themes_string = settings["excluded_themes_from_search"]
        exluded_themes = []
        if exluded_themes_string is not None:
            exluded_themes = exluded_themes_string.split(",")

        # must be done only once we have loaded the project config
        from c2cgeoportal.models import DBSession, Interface, Theme, Role

        self.session = DBSession()

        self._ = {}
        self.metadata_service_url = \
            'http://shop.geoportail.lu/Portail/inspire/webservices/getMD.jsp'
        registry = bootstrap(self.options.app_config)['registry']
        request = bootstrap(self.options.app_config)['request']

        self.es_layer_index = get_index(request) + '_layers'
        self.tdirs = registry.queryUtility(ITranslationDirectories, default=[])
        self.tsf = TranslationStringFactory('geoportailv3-client')

        self.interfaces = self.session.query(Interface).filter(
            Interface.name.in_(options.interfaces)).all()

        self.public_theme = {}
        self.public_group = {}
        for interface in self.interfaces:
            self.public_theme[interface.id] = []
            self.public_group[interface.id] = []

        for theme in self.session.query(Theme).filter_by(public=True).all():
            if theme.name not in exluded_themes:
                self._add_theme(theme)

        for role in self.session.query(Role).all():
            for theme in self.session.query(Theme).all():
                if theme.name not in exluded_themes:
                    self._add_theme(theme, role)

        ensure_index(get_elasticsearch(request), self.es_layer_index,
                     options.recreate_index)
        try:
            helpers.bulk(actions=self.layers,
                         client=get_elasticsearch(request),
                         raise_on_error=True)
        except (BulkIndexError, ConnectionTimeout) as e:
            statuslog("\n %s" % e)
    def __init__(self, options):
        self.options = options
        self.imported = set()
        self.layers = []

        settings = {}
        with open(".build/config.yaml") as f:
            settings = yaml.load(f)

        self.languages = settings["available_locale_names"]

        # must be done only once we have loaded the project config
        from c2cgeoportal.models import DBSession, Interface, Theme, Role

        self.session = DBSession()

        self._ = {}
        self.metadata_service_url = \
            'http://shop.geoportail.lu/Portail/inspire/webservices/getMD.jsp'
        registry = bootstrap(self.options.app_config)['registry']
        request = bootstrap(self.options.app_config)['request']

        self.es_layer_index = get_index(request) + '_layers'
        self.tdirs = registry.queryUtility(ITranslationDirectories, default=[])
        self.tsf = TranslationStringFactory('geoportailv3-client')

        self.interfaces = self.session.query(Interface).filter(
            Interface.name.in_(options.interfaces)
        ).all()

        self.public_theme = {}
        self.public_group = {}
        for interface in self.interfaces:
            self.public_theme[interface.id] = []
            self.public_group[interface.id] = []

        for theme in self.session.query(Theme).filter_by(public=True).all():
            self._add_theme(theme)

        for role in self.session.query(Role).all():
            for theme in self.session.query(Theme).all():
                self._add_theme(theme, role)

        ensure_index(
            get_elasticsearch(request),
            self.es_layer_index,
            options.recreate_index
        )
        try:
            helpers.bulk(actions=self.layers,
                         client=get_elasticsearch(request),
                         raise_on_error=True)
        except (BulkIndexError, ConnectionTimeout) as e:
            statuslog("\n %s" % e)
def main():
    env = bootstrap('development.ini')
    request = env['request']
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'ri', ['reset', 'index'])
    except getopt.GetoptError as err:
        print str(err)
        sys.exit(2)
    index = False
    reset = False
    for o, a in opts:
        if o in ('-r', '--reset'):
            statuslog('\rResetting Index')
            reset = True
        if o in ('-i', '--index'):
            statuslog('\rChecking Index')
            index = True

    import time
    index_name = get_index(request) + '_' + time.strftime("%Y%m%d")
    ensure_index(get_elasticsearch(request), index_name, reset)

    if index is True:
        statuslog("\rCreating Database Query ")
        c = get_cursor()
        counter = 1
        while True:
            multiple = 250
            results = c.fetchmany(multiple)
            doc_list = []
            for result in results:
                doc = update_document(get_index(request),
                                      'poi',
                                      result['id'],
                                      result)
                doc_list.append(doc)
                statuslog("\rIndexed Elements: %i" % int(counter))
                counter = counter + 1
            try:
                helpers.bulk(client=get_elasticsearch(request),
                             actions=doc_list,
                             chunk_size=multiple,
                             raise_on_error=True)
            except (BulkIndexError, ConnectionTimeout) as e:
                print "\n %s" % e
            if not results:
                statuslog("\n")
                break
Example #4
0
def main():
    env = bootstrap('development.ini')
    request = env['request']
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'ri', ['reset', 'index'])
    except getopt.GetoptError as err:
        print str(err)
        sys.exit(2)
    index = False
    reset = False
    for o, a in opts:
        if o in ('-r', '--reset'):
            statuslog('\rResetting Index')
            reset = True
        if o in ('-i', '--index'):
            statuslog('\rChecking Index')
            index = True

    import time
    index_name = get_index(request) + '_' + time.strftime("%Y%m%d")
    ensure_index(get_elasticsearch(request), index_name, reset)

    if index is True:
        statuslog("\rCreating Database Query ")
        c = get_cursor()
        counter = 1
        while True:
            multiple = 250
            results = c.fetchmany(multiple)
            doc_list = []
            for result in results:
                doc = update_document(get_index(request), 'poi', result['id'],
                                      result)
                doc_list.append(doc)
                statuslog("\rIndexed Elements: %i" % int(counter))
                counter = counter + 1
            try:
                helpers.bulk(client=get_elasticsearch(request),
                             actions=doc_list,
                             chunk_size=multiple,
                             raise_on_error=True)
            except (BulkIndexError, ConnectionTimeout) as e:
                print "\n %s" % e
            if not results:
                statuslog("\n")
                break
Example #5
0
def main():
    env = bootstrap('development.ini')
    request = env['request']
    try:
        opts, args = getopt.getopt(sys.argv[1:], '', ['recreate'])
    except getopt.GetoptError as err:
        print str(err)
        sys.exit(2)
    recreate = False
    for o, a in opts:
        if o == '--recreate':
            recreate = True
    ensure_index(get_elasticsearch(request), get_index(request), recreate)
    statuslog("\rCreating Database Query ")
    c = get_cursor()
    counter = 1
    while True:
        multiple = 250
        results = c.fetchmany(multiple)
        doc_list = []
        for result in results:
            doc = update_document(get_index(request),
                                  'poi',
                                  result['id'],
                                  result)
            doc_list.append(doc)
            statuslog("\rIndexed Elements: %i" % int(counter))
            counter = counter + 1
        try:
            helpers.bulk(client=get_elasticsearch(request),
                         actions=doc_list,
                         chunk_size=multiple,
                         raise_on_error=True)
        except (BulkIndexError, ConnectionTimeout) as e:
            print "\n %s" % e
        if not results:
            statuslog("\n")
            break
    def cmssearch(self):
        if 'query' not in self.request.params:
            return HTTPBadRequest(detail='no query')
        query = self.request.params.get('query')
        query_language = self.request.params.get('language', 'fr')

        maxlimit = self.settings.get('maxlimit', 200)

        try:
            limit = int(self.request.params.get(
                'limit',
                self.settings.get('defaultlimit', 30)))
        except ValueError:
            return HTTPBadRequest(detail='limit value is incorrect')
        if limit > maxlimit:
            limit = maxlimit

        query_body = {
            "query": {
                "bool": {
                    "filter": {
                        "bool": {
                            "must": [],
                            "should": [],
                            "must_not": [],
                        }
                    },
                    "must": {
                        "multi_match": {
                            "type": "best_fields",
                            "fields": [
                                "title^2",
                                "text",
                            ],
                            "fuzziness": "auto",
                            "prefix_length": 3,
                            "operator": "and",
                            "query": query
                        }
                    },
                }
            }
        }
        filters = query_body['query']['bool']['filter']['bool']

        filters['must'].append({"term": {"language": query_language}})
        es = get_elasticsearch(self.request)
        search = es.search(index='cms-index',
                           body=query_body,
                           size=limit*4)
        objs = search['hits']['hits']
        features = []

        for o in objs:
            s = o['_source']
            feature = {
                "url": s['url'],
                "title": s['title'],
                "text": s['text'],
                "language": s['language']
            }
            features.append(feature)
        return features[:limit]
    def fulltextsearch(self):
        if 'query' not in self.request.params:
            return HTTPBadRequest(detail='no query')
        query = self.request.params.get('query')

        maxlimit = self.settings.get('maxlimit', 200)

        try:
            limit = int(self.request.params.get(
                'limit',
                self.settings.get('defaultlimit', 30)))
        except ValueError:
            return HTTPBadRequest(detail='limit value is incorrect')
        if limit > maxlimit:
            limit = maxlimit

        try:
            layer = self.request.params.get('layer')
        except:
            pass

        query_body = {
            "query": {
                "bool": {
                    "filter": {
                        "bool": {
                            "must": [],
                            "should": [],
                            "must_not": [],
                        }
                    },
                    "minimum_should_match": 2,
                    "should": [
                        {
                            "multi_match": {
                                "type": "best_fields",
                                "fields": [
                                    "label^2",
                                    "label.ngram^2",
                                    "label.simplified^2"
                                ],
                                "operator": "and",
                                "query": query
                            }
                        },
                        {
                            "multi_match": {
                                "type": "best_fields",
                                "fields": [
                                    "label.ngram",
                                    "label.simplified"
                                ],
                                "fuzziness": 1,
                                "operator": "and",
                                "query": query
                            }
                        },
                        {
                            "term": {
                                "layer_name": {
                                    "value": "Commune", "boost": 2
                                }
                            }
                        },
                        {
                            "term": {
                                "layer_name": {
                                    "value": "Localité", "boost": 1.7
                                }
                            }
                        },
                        {
                            "term": {
                                "layer_name": {
                                    "value": "lieu_dit", "boost": 1.5
                                }
                            }
                        },
                        {
                            "wildcard": {
                                "layer_name": {
                                    "value": "editus_poi*",
                                    "boost": -1.5
                                }
                            }
                        }
                    ]
                }
            }
        }
        filters = query_body['query']['bool']['filter']['bool']

        filters['must'].append({"type": {"value": "poi"}})

        if layer:
            for cur_layer in layer.split(","):
                filters['should'].append({"term": {"layer_name": cur_layer}})

        if self.request.user is None:
            filters['must'].append({"term": {"public": True}})
        else:
            role_id = self.request.user.role.id
            filters['should'].append({"term": {"public": True}})
            filters['should'].append({"term": {"role_id": role_id}})

        es = get_elasticsearch(self.request)
        search = es.search(index=get_index(self.request),
                           body=query_body,
                           size=limit)
        objs = search['hits']['hits']
        features = []

        for o in objs:
            s = o['_source']
            try:
                id = s['object_id']
            except:
                id = o['_id']
            if s['ts'] is not None:
                properties = {
                    "label": s['label'],
                    "layer_name": s['layer_name'],
                }
                bbox = {}
                if not s['ts']['type'] == 'Point':
                    try:
                        geom = shape(s['ts'])
                        bbox = geom.bounds
                    except:
                        pass
                feature = Feature(id=id,
                                  geometry=s['ts'],
                                  properties=properties,
                                  bbox=bbox)
                features.append(feature)
        return FeatureCollection(features)
    def layersearch(self):
        if 'query' not in self.request.params:
            return HTTPBadRequest(detail='no query')
        query = self.request.params.get('query')

        maxlimit = self.settings.get('maxlimit', 200)

        try:
            limit = int(self.request.params.get(
                'limit',
                self.settings.get('defaultlimit', 30)))
        except ValueError:
            return HTTPBadRequest(detail='limit value is incorrect')
        if limit > maxlimit:
            limit = maxlimit

        query_body = {
            "query": {
                "bool": {
                    "filter": {
                        "bool": {
                            "must": [],
                            "should": [],
                            "must_not": [],
                        }
                    },
                    "must": {
                        "multi_match": {
                            "type": "cross_fields",
                            "fields": [
                                "name_translated.simplified^2",
                                "name_translated.ngram",
                                "metadata_name.simplified^2",
                                "metadata_name.ngram",
                                "keywords",
                                "description",
                            ],
                            "operator": "and",
                            "query": query
                        }
                    },
                }
            }
        }
        filters = query_body['query']['bool']['filter']['bool']

        filters['must'].append({"type": {"value": "layer"}})

        if self.request.user is None:
            filters['must'].append({"term": {"public": True}})
        else:
            role_id = self.request.user.role.id
            filters['should'].append({"term": {"public": True}})
            filters['should'].append({"term": {"role_id": role_id}})

        es = get_elasticsearch(self.request)
        layer_index = get_index(self.request) + '_layers'
        search = es.search(index=layer_index,
                           body=query_body,
                           size=limit*4)
        objs = search['hits']['hits']
        features = []

        layer_ids = []
        for o in objs:
            s = o['_source']
            if s['layer_id'] not in layer_ids:
                feature = {
                    "language": s['language'],
                    "name": s['name'],
                    "layer_id": s['layer_id'],
                }
                features.append(feature)
                layer_ids.append(s['layer_id'])
        return features[:limit]
    def cmssearch(self):
        if 'query' not in self.request.params:
            return HTTPBadRequest(detail='no query')
        query = self.request.params.get('query')
        query_language = self.request.params.get('language', 'fr')

        maxlimit = self.settings.get('maxlimit', 200)

        try:
            limit = int(
                self.request.params.get('limit',
                                        self.settings.get('defaultlimit', 30)))
        except ValueError:
            return HTTPBadRequest(detail='limit value is incorrect')
        if limit > maxlimit:
            limit = maxlimit

        query_body = {
            "query": {
                "bool": {
                    "filter": {
                        "bool": {
                            "must": [],
                            "should": [],
                            "must_not": [],
                        }
                    },
                    "must": {
                        "multi_match": {
                            "type": "best_fields",
                            "fields": [
                                "title^2",
                                "text",
                            ],
                            "fuzziness": "auto",
                            "prefix_length": 3,
                            "operator": "and",
                            "query": query
                        }
                    },
                }
            }
        }
        filters = query_body['query']['bool']['filter']['bool']

        filters['must'].append({"term": {"language": query_language}})
        es = get_elasticsearch(self.request)
        search = es.search(index='cms-index', body=query_body, size=limit * 4)
        objs = search['hits']['hits']
        features = []

        for o in objs:
            s = o['_source']
            feature = {
                "url": s['url'],
                "title": s['title'],
                "text": s['text'],
                "language": s['language']
            }
            features.append(feature)
        return features[:limit]
    def fulltextsearch(self):
        if 'query' not in self.request.params:
            return HTTPBadRequest(detail='no query')
        query = self.request.params.get('query')

        maxlimit = self.settings.get('maxlimit', 200)

        try:
            limit = int(
                self.request.params.get('limit',
                                        self.settings.get('defaultlimit', 30)))
        except ValueError:
            return HTTPBadRequest(detail='limit value is incorrect')
        if limit > maxlimit:
            limit = maxlimit

        try:
            layer = self.request.params.get('layer')
        except:
            pass

        query_body = {
            "query": {
                "bool": {
                    "filter": {
                        "bool": {
                            "must": [],
                            "should": [],
                            "must_not": [],
                        }
                    },
                    "minimum_should_match":
                    2,
                    "should": [{
                        "multi_match": {
                            "type":
                            "best_fields",
                            "fields":
                            ["label^2", "label.ngram^2", "label.simplified^2"],
                            "operator":
                            "and",
                            "query":
                            query
                        }
                    }, {
                        "multi_match": {
                            "type": "best_fields",
                            "fields": ["label.ngram", "label.simplified"],
                            "fuzziness": 1,
                            "operator": "and",
                            "query": query
                        }
                    }, {
                        "term": {
                            "layer_name": {
                                "value": "Commune",
                                "boost": 2
                            }
                        }
                    }, {
                        "term": {
                            "layer_name": {
                                "value": "Localité",
                                "boost": 1.7
                            }
                        }
                    }, {
                        "term": {
                            "layer_name": {
                                "value": "lieu_dit",
                                "boost": 1.5
                            }
                        }
                    }, {
                        "wildcard": {
                            "layer_name": {
                                "value": "editus_poi*",
                                "boost": -1.5
                            }
                        }
                    }]
                }
            }
        }
        filters = query_body['query']['bool']['filter']['bool']

        filters['must'].append({"type": {"value": "poi"}})

        if layer:
            for curLayer in layer.split(","):
                filters['should'].append({"term": {"layer_name": curLayer}})

        if self.request.user is None:
            filters['must'].append({"term": {"public": True}})
        else:
            role_id = self.request.user.role.id
            filters['should'].append({"term": {"public": True}})
            filters['should'].append({"term": {"role_id": role_id}})

        es = get_elasticsearch(self.request)
        search = es.search(index=get_index(self.request),
                           body=query_body,
                           size=limit)
        objs = search['hits']['hits']
        features = []

        for o in objs:
            s = o['_source']
            try:
                id = s['object_id']
            except:
                id = o['_id']
            if s['ts'] is not None:
                properties = {
                    "label": s['label'],
                    "layer_name": s['layer_name'],
                }
                bbox = {}
                if not s['ts']['type'] == 'Point':
                    try:
                        geom = shape(s['ts'])
                        bbox = geom.bounds
                    except:
                        pass
                feature = Feature(id=id,
                                  geometry=s['ts'],
                                  properties=properties,
                                  bbox=bbox)
                features.append(feature)
        return FeatureCollection(features)
    def layersearch(self):
        if 'query' not in self.request.params:
            return HTTPBadRequest(detail='no query')
        query = self.request.params.get('query')

        maxlimit = self.settings.get('maxlimit', 200)

        try:
            limit = int(
                self.request.params.get('limit',
                                        self.settings.get('defaultlimit', 30)))
        except ValueError:
            return HTTPBadRequest(detail='limit value is incorrect')
        if limit > maxlimit:
            limit = maxlimit

        query_body = {
            "query": {
                "bool": {
                    "filter": {
                        "bool": {
                            "must": [],
                            "should": [],
                            "must_not": [],
                        }
                    },
                    "must": {
                        "multi_match": {
                            "type":
                            "cross_fields",
                            "fields": [
                                "name_translated.simplified^2",
                                "name_translated.ngram",
                                "metadata_name.simplified^2",
                                "metadata_name.ngram",
                                "keywords",
                                "description",
                            ],
                            "operator":
                            "and",
                            "query":
                            query
                        }
                    },
                }
            }
        }
        filters = query_body['query']['bool']['filter']['bool']

        filters['must'].append({"type": {"value": "layer"}})

        if self.request.user is None:
            filters['must'].append({"term": {"public": True}})
        else:
            role_id = self.request.user.role.id
            filters['should'].append({"term": {"public": True}})
            filters['should'].append({"term": {"role_id": role_id}})

        es = get_elasticsearch(self.request)
        layer_index = get_index(self.request) + '_layers'
        search = es.search(index=layer_index, body=query_body, size=limit * 4)
        objs = search['hits']['hits']
        features = []

        layer_ids = []
        for o in objs:
            s = o['_source']
            if s['layer_id'] not in layer_ids:
                feature = {
                    "language": s['language'],
                    "name": s['name'],
                    "layer_id": s['layer_id'],
                }
                features.append(feature)
                layer_ids.append(s['layer_id'])
        return features[:limit]