def delete_docs(self, website_id): while True: try: logger.debug("Deleting docs of " + str(website_id)) to_delete = helpers.scan(query={ "query": { "term": { "website_id": website_id } } }, scroll="1m", client=self.es, index=self.index_name, request_timeout=120, routing=website_id) buf = [] counter = 0 for doc in to_delete: buf.append(doc) counter += 1 if counter >= 10000: self._delete(buf, website_id) buf.clear() counter = 0 if counter > 0: self._delete(buf, website_id) break except Exception as e: logger.error("During delete: " + str(e)) time.sleep(10) logger.debug("Done deleting for " + str(website_id))
def _index(self, docs): while True: try: logger.debug("Indexing " + str(len(docs)) + " docs") bulk_string = ElasticSearchEngine.create_bulk_index_string(docs) self.es.bulk(body=bulk_string, index=self.index_name, doc_type="file", request_timeout=30, routing=docs[0]["website_id"]) break except Exception as e: logger.error("Error in _index: " + str(e) + ", retrying") time.sleep(10)