Exemple #1
0
 def _update_with_es(self):
     kw={
         'index': self.es_index,
         'doc_type': self.query_type,
         'scroll': '1m',
         'search_type': 'scan',
         'size': self.bulk_size
     }
     scroll = self.es.search(**kw)
     sid = scroll['_scroll_id']
     total_size = scroll['hits']['total']
     hits_size = total_size
     dealt_size = 0
     print("docs: " + str(total_size))
     suffix = '%(percent)d%% - %(index)d [%(elapsed_td)s / %(eta_td)s]'
     bar = ShadyBar("clone",suffix=suffix,max=total_size)
     while (hits_size>0):
         scroll = self.es.scroll(scroll_id=sid,scroll='1m')
         sid = scroll['_scroll_id']
         hits = scroll['hits']['hits']
         hits_size = len(hits)
         # todo
         if (hits_size>0):
             res = self._bulk_es_mongo(hits)
         #
         # dealt size
         dealt_size += hits_size
         bar.goto(dealt_size)
     # done
     print('\nDone !')
 def _copy_data(self):
     ss_kw = {}
     # sort
     if self.source_sort:
         ss_kw['sort'] = self.source_sort
     scroll = self.source_es.search(index=self.source_index,
                                    scroll='1m',
                                    search_type='scan',
                                    size=self.bulk_size,
                                    version=True,
                                    timeout='60s',
                                    **ss_kw)
     sid = scroll['_scroll_id']
     total_size = scroll['hits']['total']
     hits_size = total_size
     dealt_size = 0
     print("docs: " + str(total_size))
     self.logger.info("docs: " + str(total_size))
     suffix = '%(percent)d%% - %(index)d [%(elapsed_td)s / %(eta_td)s]'
     bar = ShadyBar("clone", suffix=suffix, max=total_size)
     while (hits_size > 0):
         scroll = self.source_es.scroll(scroll_id=sid, scroll='1m')
         sid = scroll['_scroll_id']
         hits = scroll['hits']['hits']
         hits_size = len(hits)
         actions = self._bulk_hits(hits)
         if (len(actions) > 0):
             kw = {}
             kw['timeout'] = '60s'
             res = []
             try:
                 res = streaming_bulk(client=self.target_es,
                                      actions=actions,
                                      **kw)
             except BulkIndexError as err:
                 print(err)
                 pass
             okNum = 0
             for ok, re in res:
                 if not ok:
                     print(re)
                 else:
                     okNum += 1
             # refresh index
             if (okNum > 0):
                 self.target_es.indices.refresh(index=self.target_index)
         # dealt size
         dealt_size += hits_size
         bar.goto(dealt_size)
         self.logger.info("dealt: " + str(dealt_size) + " / " +
                          str(total_size))
     print('\nDone !')
     self.logger.info("Done ! \n\n")