def get_es_context(self, **kwargs): return { "index": utils.get_elasticsearch_index(), "doc_type": utils.get_elasticsearch_type(), "size": 0, "body": { "query": { "constant_score": { "filter": { "term": { "is_banned": 1 } } } }, "aggs": { "domains": { "terms": { "field": "domain", "size": 1000 } } } }, "_source_include": ["title", "url", "meta", "updated_on", "domain"] }
def suggest(self, **kwargs): """ Did you mean functionality """ suggest = None es_obj = self.es_obj or utils.get_elasticsearch_object() payload = { "index": utils.get_elasticsearch_tor_index(), "doc_type": utils.get_elasticsearch_type(), "size": 0, "body": { "suggest": { "text": kwargs.get('q'), "simple-phrase": { "phrase": { "field": "fancy", "gram_size": 2 # todo make this applicable? } } } } } resp = es_obj.search(**payload) try: suggestions = resp['suggest']['simple-phrase'][0]['options'] if len(suggestions) > 0: suggest = suggestions[0]['text'] except (TypeError, ValueError) as e: logger.exception(e) return suggest
def get_es_context(self, **kwargs): return { "index": utils.get_elasticsearch_index(), "doc_type": utils.get_elasticsearch_type(), "size": 0, "body": { "aggs" : { "domains" : { "terms" : {"field" : "domain", "size": 1000} } } }, "_source_include": ["title", "url", "meta", "updated_on", "domain"] }
def get_es_context(self, **kwargs): query = kwargs['q'] return { "index": utils.get_elasticsearch_index(), "doc_type": utils.get_elasticsearch_type(), "body": { "query": { "bool": { "must": [{ "multi_match": { "query": query, "type": "most_fields", "fields": ["fancy", "fancy.stemmed", "fancy.shingles"], "minimum_should_match": "75%", "cutoff_frequency": 0.01 } }], "must_not": [{ "exists": { "field": "is_fake", "field": "is_banned" } }] # "filter": [ # { # "missing": { # "field": "is_fake" # } # }, # { # "missing": { # "field": "is_banned" # } # } # ] } }, "aggregations": { "domains": { "terms": { "size": 1000, "field": "domain", "order": { "max_score": "desc" } }, "aggregations": { "score": { "top_hits": { "size": 1, "sort": [{ "authority": { "order": "desc", "missing": 0.0000000001 } }, { "_score": { "order": "desc" } }], "_source": { "include": [ "title", "url", "meta", "updated_on", "domain", "authority", "anchors" ] } } }, "max_score": { "max": { "script": "_score" } } } } } }, "size": 0 }
def get_es_context(self, **kwargs): return { "index": utils.get_elasticsearch_tor_index(), "doc_type": utils.get_elasticsearch_type(), "body": { "query": { "bool": { "must": [{ "multi_match": { "query": kwargs['q'], "type": "most_fields", "fields": [ 'title^6', 'anchor^6', 'fancy.shingles^3', 'fancy.stemmed^3', 'fancy^3', 'content^1', ], "minimum_should_match": "75%", "cutoff_frequency": 0.01 } }], "must_not": [{ "exists": { # todo duplicate key since its defined as python dict "field": "is_fake", "field": "is_banned" } }] # "filter": [ # { # "missing": { # "field": "is_fake" # } # }, # { # "missing": { # "field": "is_banned" # } # } # ] } }, "suggest": { "text": kwargs.get('q'), "simple-phrase": { "phrase": { "field": "fancy", "gram_size": 2 # todo make this applicable? } } }, "aggregations": { "domains": { "terms": { "size": 1000, "field": "domain", "order": { "max_score": "desc" } }, "aggregations": { "score": { "top_hits": { "size": 1, "sort": [{ "authority": { "order": "desc", "missing": 0.0000000001 } }, { "_score": { "order": "desc" } }], "_source": { "include": [ "title", "url", "meta", "updated_on", "domain", "authority", "anchors", "links" ] } } }, "max_score": { "max": { "script": "_score" } }, } } } }, "size": 0 }
def get_es_context(self, **kwargs): return { "index": utils.get_elasticsearch_tor_index(), "doc_type": utils.get_elasticsearch_type(), "body": { "query": { "bool": { "must": [ { "multi_match": { "query": kwargs['q'], "type": "most_fields", "fields": [ 'title^6', 'anchor^6', 'fancy.shingles^3', 'fancy.stemmed^3', 'fancy^3', 'content^1', ], "minimum_should_match": "75%", "cutoff_frequency": 0.01 } } ], "must_not": [ { "exists": { # todo duplicate key since its defined as python dict "field": "is_fake", "field": "is_banned" } } ] # "filter": [ # { # "missing": { # "field": "is_fake" # } # }, # { # "missing": { # "field": "is_banned" # } # } # ] } }, "suggest": { "text": kwargs.get('q'), "simple-phrase": { "phrase": { "field": "fancy", "gram_size": 2 # todo make this applicable? } } }, "aggregations": { "domains": { "terms": { "size": 1000, "field": "domain", "order": {"max_score": "desc"} }, "aggregations": { "score": { "top_hits": { "size": 1, "sort": [ { "authority": { "order": "desc", "missing": 0.0000000001 } }, { "_score": { "order": "desc" } } ], "_source": { "include": ["title", "url", "meta", "updated_on", "domain", "authority", "anchors", "links"] } } }, "max_score": { "max": { "script": "_score" } }, } } } }, "size": 0 }
def get_es_context(self, **kwargs): query = kwargs['q'] return { "index": utils.get_elasticsearch_index(), "doc_type": utils.get_elasticsearch_type(), "body": { "query": { "bool": { "must": [ { "multi_match": { "query": query, "type": "most_fields", "fields": [ "fancy", "fancy.stemmed", "fancy.shingles" ], "minimum_should_match": "75%", "cutoff_frequency": 0.01 } } ], "filter": [ { "missing": { "field": "is_fake" } }, { "missing": { "field": "is_banned" } } ] } }, "aggregations" : { "domains" : { "terms" : { "size" : 1000, "field" : "domain", "order": {"max_score": "desc"} }, "aggregations": { "score": { "top_hits": { "size" : 1, "sort": [ { "authority": { "order": "desc", "missing": 0.0000000001 } }, { "_score": { "order": "desc" } } ], "_source": { "include": ["title", "url", "meta", "updated_on", "domain", "authority", "anchors"] } } }, "max_score": { "max": { "script": "_score" } } } } } }, "size": 0 }