Ejemplo n.º 1
0
 def delete_session(self, id):
     session_connector = Es_connector(index=self.sessions_index,
                                      doc_type=self.sessions_doc_type)
     session = session_connector.get(id)
     if session:
         print("delete Session")
         # print(session)
         # 1. Delete session data from the tweets
         tweets_connector = Es_connector(
             index=session['_source']['s_index'],
             doc_type=session['_source']['s_type'])
         session_name = 'session_' + session['_source']['s_name']
         print(session_name)
         tweets_connector.remove_field_all(session_name)
         # 2. Delete the session
         session_connector.delete(id)
         return True
     else:
         return False
Ejemplo n.º 2
0
        "aggs": {
            "duplicated_by_str_id": {
                "terms": {
                    "field": "id_str.keyword",
                    "min_doc_count": 2,
                    "size": 20
                }
            }
        }
    })
    buckets_size = len(res['aggregations']['duplicated_by_str_id']['buckets'])

    for bucket in res['aggregations']['duplicated_by_str_id']['buckets']:

        print("Deleting ", bucket["key"])
        duplicated_res = my_conn.search(
            {"query": {
                "match": {
                    "id_str": bucket["key"]
                }
            }})

        total_dup_files = duplicated_res["hits"]["total"]
        for i in range(0, total_dup_files - 1):
            doc = duplicated_res["hits"]["hits"][i]
            my_conn.delete(doc["_id"])

    time.sleep(2)  # Sleep 2 seconds to avoid errors in the next loop

print("Done!")