def get_event_state_tweets_count(self, index="test3", session="", words="", state="confirmed"): my_connector = Es_connector(index=index) query = { "query": { "bool": { "must": [{ "match": { "text": { "query": words } } }], "filter": { "bool": { "should": [{ "match": { "session_" + session: state } }] } } } } } res = my_connector.count(query) return res['count']
def get_words_count(self, index="test3", words=""): my_connector = Es_connector(index=index) query = { "query": { "simple_query_string": { "fields": ["text"], "query": words } } } res = my_connector.count(query) return res['count']
def get_range_count(self, index, start, end): my_connector = Es_connector(index=index) query = { "query": { "range": { "timestamp_ms": { "gt": str(start), "lt": str(end) } } } } print(query) res = my_connector.count(query) return res['count']
def get_words_tweets_count(self, index="test3", session="", words=""): my_connector = Es_connector(index=index) query = { "query": { "bool": { "must": [{ "match": { "text": { "query": words } } }] } } } res = my_connector.count(query) return res['count']
def get_event_tweets_count(self, index="test3", main_term="", related_terms=""): my_connector = Es_connector(index=index) terms = [] words = main_term + ' ' for t in related_terms: terms.append( {"match": { "text": { "query": t['word'], "boost": t['value'] } }}) words += t['word'] + " " terms.append({"match": {"text": {"query": main_term, "boost": 2}}}) query = {"query": {"bool": {"should": terms}}} res = my_connector.count(query) return res['count']
def get_event_clusters(self, index="test3", main_term="", related_terms=""): my_connector = Es_connector(index=index) terms = [] words = main_term + ' ' for t in related_terms: terms.append( {"match": { "text": { "query": t['word'], "boost": t['value'] } }}) words += t['word'] + " " terms.append({"match": {"text": {"query": main_term, "boost": 2}}}) # query = { # "size": 0, # "query": { # "bool": { # "should": terms # } # }, # "aggs": { # "group_by_cluster": { # "terms": { # "field": "imagesCluster", # "size": 200 # } # } # } # } query = { "size": 0, "query": { "bool": { "should": terms } }, "aggregations": { "group_by_cluster": { "terms": { "field": "imagesCluster", # "shard_size": 999999999, "size": 999999 } } } } # print(query) res = my_connector.search(query) # print("Clusters") # print(res['aggregations']['group_by_cluster']['buckets']) clusters = res['aggregations']['group_by_cluster']['buckets'] with open(index + '.json') as f: data = json.load(f) for cluster in clusters: # q1 = { # "_source": [ # "text", # "imagesCluster" # ], # "query": { # "bool": { # "should": terms, # "filter": { # "bool": { # "should": [ # { # "match": { # "imagesCluster": cluster['key'] # } # } # ] # } # } # } # } # } q2 = {"query": {"term": {"imagesCluster": cluster['key']}}} # cres1 = my_connector.search(q1) cres = my_connector.count(q2) # print(cluster['key']) images = data['duplicates'][cluster['key']] # print(images[0]) cluster['image'] = images[0] # cluster['size'] = len(images) # print(cres) cluster['size'] = cres['count'] # cluster['size2'] = cres1['hits']['total'] # if cluster['key']==1452: # print(cluster) # print(clusters) return clusters
def get_all_count(self, index="test3"): my_connector = Es_connector(index=index) query = {"query": {"match_all": {}}} res = my_connector.count(query) return res['count']