def _get_ids_worker_by_taxid(args): from utils.es import ESIndexer from pyes import TermQuery es_kwargs, taxid, step = args q = TermQuery() q.add('taxid', taxid) q.fields = [] q.size = step esi = ESIndexer(**es_kwargs) res = esi.conn.search(q) xli = [doc['_id'] for doc in res] assert len(xli) == res.total return xli
def post_verify_changes(self, changes): target = GeneDocESBackend(self) _timestamp = changes['timestamp'] ts_stats = self.get_timestamp_stats() if changes['add'] or changes['update']: print('Verifying "add" and "update"...', end='') assert ts_stats[0][0] == _timestamp, "{} != {}".format(ts_stats[0][0], _timestamp) _cnt = ts_stats[0][1] _cnt_add_update = len(changes['add']) + len(changes['update']) if _cnt == _cnt_add_update: print('...{}=={}...OK'.format(_cnt, _cnt_add_update)) else: print('...{}!={}...ERROR!!!'.format(_cnt, _cnt_add_update)) if changes['delete']: print('Verifying "delete"...', end='') _res = target.mget_from_ids(changes['delete']) _cnt = len([x for x in _res if x]) if _cnt == 0: print('...{}==0...OK'.format(_cnt)) else: print('...{}!=0...ERROR!!!'.format(_cnt)) print("Verifying all docs have timestamp...", end='') _cnt = sum([x[1] for x in ts_stats]) _cnt_all = self.count()['count'] if _cnt == _cnt_all: print('{}=={}...OK'.format(_cnt, _cnt_all)) else: print('ERROR!!!\n\t Should be "{}", but get "{}"'.format(_cnt_all, _cnt)) print("Verifying all new docs have updated timestamp...") ts = time.mktime(_timestamp.utctimetuple()) ts = ts - 8 * 3600 # convert to utc timestamp, here 8 hour difference is hard-coded (PST) ts = int(ts * 1000) q = TermQuery() q.add('_timestamp', ts) cur = self.doc_feeder(query=q, fields=[], step=10000) _li1 = sorted(changes['add'] + [x['_id'] for x in changes['update']]) _li2 = sorted([x['_id'] for x in cur]) if _li1 == _li2: print("{}=={}...OK".format(len(_li1), len(_li2))) else: print('ERROR!!!\n\t Should be "{}", but get "{}"'.format(len(_li1), len(_li2)))