예제 #1
0
 def get_event_state_tweets_count(self,
                                  index="test3",
                                  session="",
                                  words="",
                                  state="confirmed"):
     my_connector = Es_connector(index=index)
     query = {
         "query": {
             "bool": {
                 "must": [{
                     "match": {
                         "text": {
                             "query": words
                         }
                     }
                 }],
                 "filter": {
                     "bool": {
                         "should": [{
                             "match": {
                                 "session_" + session: state
                             }
                         }]
                     }
                 }
             }
         }
     }
     res = my_connector.count(query)
     return res['count']
예제 #2
0
 def get_words_count(self, index="test3", words=""):
     my_connector = Es_connector(index=index)
     query = {
         "query": {
             "simple_query_string": {
                 "fields": ["text"],
                 "query": words
             }
         }
     }
     res = my_connector.count(query)
     return res['count']
예제 #3
0
 def get_range_count(self, index, start, end):
     my_connector = Es_connector(index=index)
     query = {
         "query": {
             "range": {
                 "timestamp_ms": {
                     "gt": str(start),
                     "lt": str(end)
                 }
             }
         }
     }
     print(query)
     res = my_connector.count(query)
     return res['count']
예제 #4
0
 def get_words_tweets_count(self, index="test3", session="", words=""):
     my_connector = Es_connector(index=index)
     query = {
         "query": {
             "bool": {
                 "must": [{
                     "match": {
                         "text": {
                             "query": words
                         }
                     }
                 }]
             }
         }
     }
     res = my_connector.count(query)
     return res['count']
예제 #5
0
 def get_event_tweets_count(self,
                            index="test3",
                            main_term="",
                            related_terms=""):
     my_connector = Es_connector(index=index)
     terms = []
     words = main_term + ' '
     for t in related_terms:
         terms.append(
             {"match": {
                 "text": {
                     "query": t['word'],
                     "boost": t['value']
                 }
             }})
         words += t['word'] + " "
     terms.append({"match": {"text": {"query": main_term, "boost": 2}}})
     query = {"query": {"bool": {"should": terms}}}
     res = my_connector.count(query)
     return res['count']
예제 #6
0
    def get_event_clusters(self,
                           index="test3",
                           main_term="",
                           related_terms=""):
        my_connector = Es_connector(index=index)
        terms = []
        words = main_term + ' '
        for t in related_terms:
            terms.append(
                {"match": {
                    "text": {
                        "query": t['word'],
                        "boost": t['value']
                    }
                }})
            words += t['word'] + " "
        terms.append({"match": {"text": {"query": main_term, "boost": 2}}})
        # query = {
        #     "size": 0,
        #     "query": {
        #             "bool": {
        #                 "should": terms
        #             }
        #         },
        #     "aggs": {
        #         "group_by_cluster": {
        #             "terms": {
        #                 "field": "imagesCluster",
        #                 "size": 200
        #             }
        #         }
        #     }
        # }
        query = {
            "size": 0,
            "query": {
                "bool": {
                    "should": terms
                }
            },
            "aggregations": {
                "group_by_cluster": {
                    "terms": {
                        "field": "imagesCluster",
                        # "shard_size": 999999999,
                        "size": 999999
                    }
                }
            }
        }
        # print(query)
        res = my_connector.search(query)
        # print("Clusters")
        # print(res['aggregations']['group_by_cluster']['buckets'])
        clusters = res['aggregations']['group_by_cluster']['buckets']
        with open(index + '.json') as f:
            data = json.load(f)

        for cluster in clusters:
            # q1 = {
            #       "_source": [
            #         "text",
            #         "imagesCluster"
            #       ],
            #       "query": {
            #         "bool": {
            #            "should": terms,
            #           "filter": {
            #             "bool": {
            #               "should": [
            #                 {
            #                   "match": {
            #                     "imagesCluster": cluster['key']
            #                   }
            #                 }
            #               ]
            #             }
            #           }
            #         }
            #       }
            #     }
            q2 = {"query": {"term": {"imagesCluster": cluster['key']}}}
            # cres1 = my_connector.search(q1)
            cres = my_connector.count(q2)
            # print(cluster['key'])
            images = data['duplicates'][cluster['key']]
            # print(images[0])
            cluster['image'] = images[0]
            # cluster['size'] = len(images)
            # print(cres)
            cluster['size'] = cres['count']
            # cluster['size2'] = cres1['hits']['total']
            # if cluster['key']==1452:
            #     print(cluster)
        # print(clusters)
        return clusters
예제 #7
0
 def get_all_count(self, index="test3"):
     my_connector = Es_connector(index=index)
     query = {"query": {"match_all": {}}}
     res = my_connector.count(query)
     return res['count']