def _update_with_es(self): kw={ 'index': self.es_index, 'doc_type': self.query_type, 'scroll': '1m', 'search_type': 'scan', 'size': self.bulk_size } scroll = self.es.search(**kw) sid = scroll['_scroll_id'] total_size = scroll['hits']['total'] hits_size = total_size dealt_size = 0 print("docs: " + str(total_size)) suffix = '%(percent)d%% - %(index)d [%(elapsed_td)s / %(eta_td)s]' bar = ShadyBar("clone",suffix=suffix,max=total_size) while (hits_size>0): scroll = self.es.scroll(scroll_id=sid,scroll='1m') sid = scroll['_scroll_id'] hits = scroll['hits']['hits'] hits_size = len(hits) # todo if (hits_size>0): res = self._bulk_es_mongo(hits) # # dealt size dealt_size += hits_size bar.goto(dealt_size) # done print('\nDone !')
def _copy_data(self): ss_kw = {} # sort if self.source_sort: ss_kw['sort'] = self.source_sort scroll = self.source_es.search(index=self.source_index, scroll='1m', search_type='scan', size=self.bulk_size, version=True, timeout='60s', **ss_kw) sid = scroll['_scroll_id'] total_size = scroll['hits']['total'] hits_size = total_size dealt_size = 0 print("docs: " + str(total_size)) self.logger.info("docs: " + str(total_size)) suffix = '%(percent)d%% - %(index)d [%(elapsed_td)s / %(eta_td)s]' bar = ShadyBar("clone", suffix=suffix, max=total_size) while (hits_size > 0): scroll = self.source_es.scroll(scroll_id=sid, scroll='1m') sid = scroll['_scroll_id'] hits = scroll['hits']['hits'] hits_size = len(hits) actions = self._bulk_hits(hits) if (len(actions) > 0): kw = {} kw['timeout'] = '60s' res = [] try: res = streaming_bulk(client=self.target_es, actions=actions, **kw) except BulkIndexError as err: print(err) pass okNum = 0 for ok, re in res: if not ok: print(re) else: okNum += 1 # refresh index if (okNum > 0): self.target_es.indices.refresh(index=self.target_index) # dealt size dealt_size += hits_size bar.goto(dealt_size) self.logger.info("dealt: " + str(dealt_size) + " / " + str(total_size)) print('\nDone !') self.logger.info("Done ! \n\n")