def suggest(self, **kwargs): """ Did you mean functionality """ suggest = None es_obj = self.es_obj or utils.get_elasticsearch_object() payload = { "index": utils.get_elasticsearch_tor_index(), "doc_type": utils.get_elasticsearch_type(), "size": 0, "body": { "suggest": { "text": kwargs.get('q'), "simple-phrase": { "phrase": { "field": "fancy", "gram_size": 2 # todo make this applicable? } } } } } resp = es_obj.search(**payload) try: suggestions = resp['suggest']['simple-phrase'][0]['options'] if len(suggestions) > 0: suggest = suggestions[0]['text'] except (TypeError, ValueError) as e: logger.exception(e) return suggest
def __init__(self): super(BaseCommand, self).__init__() self.es_obj = Elasticsearch( hosts=settings.ELASTICSEARCH_SERVERS, timeout=settings.ELASTICSEARCH_TIMEOUT, connection_class=RequestsHttpConnection ) self.es_index = utils.get_elasticsearch_tor_index()
def get_es_context(self, **kwargs): return { "index": utils.get_elasticsearch_tor_index(), "doc_type": utils.get_elasticsearch_type(), "body": { "query": { "bool": { "must": [{ "multi_match": { "query": kwargs['q'], "type": "most_fields", "fields": [ 'title^6', 'anchor^6', 'fancy.shingles^3', 'fancy.stemmed^3', 'fancy^3', 'content^1', ], "minimum_should_match": "75%", "cutoff_frequency": 0.01 } }], "must_not": [{ "exists": { # todo duplicate key since its defined as python dict "field": "is_fake", "field": "is_banned" } }] # "filter": [ # { # "missing": { # "field": "is_fake" # } # }, # { # "missing": { # "field": "is_banned" # } # } # ] } }, "suggest": { "text": kwargs.get('q'), "simple-phrase": { "phrase": { "field": "fancy", "gram_size": 2 # todo make this applicable? } } }, "aggregations": { "domains": { "terms": { "size": 1000, "field": "domain", "order": { "max_score": "desc" } }, "aggregations": { "score": { "top_hits": { "size": 1, "sort": [{ "authority": { "order": "desc", "missing": 0.0000000001 } }, { "_score": { "order": "desc" } }], "_source": { "include": [ "title", "url", "meta", "updated_on", "domain", "authority", "anchors", "links" ] } } }, "max_score": { "max": { "script": "_score" } }, } } } }, "size": 0 }
def get_es_context(self, **kwargs): return { "index": utils.get_elasticsearch_tor_index(), "doc_type": utils.get_elasticsearch_type(), "body": { "query": { "bool": { "must": [ { "multi_match": { "query": kwargs['q'], "type": "most_fields", "fields": [ 'title^6', 'anchor^6', 'fancy.shingles^3', 'fancy.stemmed^3', 'fancy^3', 'content^1', ], "minimum_should_match": "75%", "cutoff_frequency": 0.01 } } ], "must_not": [ { "exists": { # todo duplicate key since its defined as python dict "field": "is_fake", "field": "is_banned" } } ] # "filter": [ # { # "missing": { # "field": "is_fake" # } # }, # { # "missing": { # "field": "is_banned" # } # } # ] } }, "suggest": { "text": kwargs.get('q'), "simple-phrase": { "phrase": { "field": "fancy", "gram_size": 2 # todo make this applicable? } } }, "aggregations": { "domains": { "terms": { "size": 1000, "field": "domain", "order": {"max_score": "desc"} }, "aggregations": { "score": { "top_hits": { "size": 1, "sort": [ { "authority": { "order": "desc", "missing": 0.0000000001 } }, { "_score": { "order": "desc" } } ], "_source": { "include": ["title", "url", "meta", "updated_on", "domain", "authority", "anchors", "links"] } } }, "max_score": { "max": { "script": "_score" } }, } } } }, "size": 0 }
def __init__(self): super(BaseCommand, self).__init__() self.es_obj = utils.get_elasticsearch_object() self.es_index = utils.get_elasticsearch_tor_index()