예제 #1
0
    def __init__(self, url, index_name):
        super().__init__()
        self.index_name = index_name
        logger.info("Connecting to ES @ %s" % url)
        self.es = elasticsearch.Elasticsearch(hosts=[url])
        self.filter = SearchFilter()

        if not self.es.indices.exists(self.index_name):
            self.init()
예제 #2
0
    def init(self):
        logger.info("Elasticsearch first time setup")
        if self.es.indices.exists(self.index_name):
            self.es.indices.delete(index=self.index_name)
        self.es.indices.create(index=self.index_name, body={
            "settings": {
                "index": {
                    "number_of_shards": 50,
                    "number_of_replicas": 0,
                    "refresh_interval": "30s",
                    "codec": "best_compression"
                },
                "analysis": {
                    "analyzer": {
                        "my_nGram": {
                            "tokenizer": "my_nGram_tokenizer",
                            "filter": ["lowercase", "asciifolding"]
                        }
                    },
                    "tokenizer": {
                        "my_nGram_tokenizer": {
                            "type": "nGram", "min_gram": 3, "max_gram": 3
                        }
                    }
                }
            }
        })

        # Index Mappings
        self.es.indices.put_mapping(body={
            "properties": {
                "path": {"analyzer": "standard", "type": "text"},
                "name": {"analyzer": "standard", "type": "text",
                         "fields": {"nGram": {"type": "text", "analyzer": "my_nGram"}}},
                "mtime": {"type": "date", "format": "epoch_second"},
                "size": {"type": "long"},
                "website_id": {"type": "integer"},
                "ext": {"type": "keyword"},
            },
            "_routing": {"required": True}
        }, doc_type="file", index=self.index_name, include_type_name=True)

        self.es.indices.open(index=self.index_name)
예제 #3
0
    def init(self):
        logger.info("Elasticsearch first time setup")
        if self.es.indices.exists(self.index_name):
            self.es.indices.delete(index=self.index_name)
        self.es.indices.create(index=self.index_name)
        self.es.indices.close(index=self.index_name)

        # Index settings
        self.es.indices.put_settings(body={
            "analysis": {
                "tokenizer": {
                    "my_nGram_tokenizer": {
                        "type": "nGram",
                        "min_gram": 3,
                        "max_gram": 3
                    }
                }
            }
        },
                                     index=self.index_name,
                                     request_timeout=60)
        self.es.indices.put_settings(body={
            "analysis": {
                "analyzer": {
                    "my_nGram": {
                        "tokenizer": "my_nGram_tokenizer",
                        "filter": ["lowercase", "asciifolding"]
                    }
                }
            }
        },
                                     index=self.index_name)

        self.es.indices.put_mapping(body={
            "properties": {
                "path": {
                    "analyzer": "standard",
                    "type": "text"
                },
                "name": {
                    "analyzer": "standard",
                    "type": "text",
                    "fields": {
                        "nGram": {
                            "type": "text",
                            "analyzer": "my_nGram"
                        }
                    }
                },
                "mtime": {
                    "type": "date",
                    "format": "epoch_second"
                },
                "size": {
                    "type": "long"
                },
                "website_id": {
                    "type": "integer"
                },
                "ext": {
                    "type": "keyword"
                },
            },
            "_routing": {
                "required": True
            }
        },
                                    doc_type="file",
                                    index=self.index_name,
                                    request_timeout=60)

        self.es.indices.open(index=self.index_name)
예제 #4
0
    def search(self, query, page, per_page, sort_order, extensions, size_min,
               size_max, match_all, fields, date_min, date_max) -> {}:

        if self.filter.should_block(query):
            logger.info("Search was blocked")
            raise InvalidQueryException(
                "One or more terms in your query is blocked by the search filter. "
                "This incident has been reported.")

        filters = []
        if extensions:
            filters.append({"terms": {"ext": extensions}})

        if size_min > 0 or size_max:
            size_filer = dict()
            new_filter = {"range": {"size": size_filer}}

            if size_min > 0:
                size_filer["gte"] = size_min
            if size_max:
                size_filer["lte"] = size_max

            filters.append(new_filter)

        if date_min > 0 or date_max:
            date_filer = dict()
            new_filter = {"range": {"mtime": date_filer}}

            if date_min > 0:
                date_filer["gte"] = date_min
            if date_max:
                date_filer["lte"] = date_max

            filters.append(new_filter)

        sort_by = ElasticSearchEngine.SORT_ORDERS.get(sort_order, [])

        page = self.es.search(body={
            "query": {
                "bool": {
                    "must": {
                        "multi_match": {
                            "query": query,
                            "fields": fields,
                            "operator": "or" if match_all else "and"
                        }
                    },
                    "filter": filters
                }
            },
            "sort": sort_by,
            "highlight": {
                "fields": {
                    "name": {
                        "pre_tags": ["<mark>"],
                        "post_tags": ["</mark>"]
                    },
                    "name.nGram": {
                        "pre_tags": ["<mark>"],
                        "post_tags": ["</mark>"]
                    },
                    "path": {
                        "pre_tags": ["<mark>"],
                        "post_tags": ["</mark>"]
                    }
                }
            },
            "size": per_page,
            "from": min(page * per_page, 10000 - per_page)
        },
                              index=self.index_name,
                              request_timeout=20)

        return page