Esempio n. 1
0
 def _get_ids_worker_by_taxid(args):
     from utils.es import ESIndexer
     from pyes import TermQuery
     es_kwargs, taxid, step = args
     q = TermQuery()
     q.add('taxid', taxid)
     q.fields = []
     q.size = step
     esi = ESIndexer(**es_kwargs)
     res = esi.conn.search(q)
     xli = [doc['_id'] for doc in res]
     assert len(xli) == res.total
     return xli
Esempio n. 2
0
    def post_verify_changes(self, changes):
        target = GeneDocESBackend(self)
        _timestamp = changes['timestamp']
        ts_stats = self.get_timestamp_stats()

        if changes['add'] or changes['update']:
            print('Verifying "add" and "update"...', end='')
            assert ts_stats[0][0] == _timestamp, "{} != {}".format(ts_stats[0][0], _timestamp)
            _cnt = ts_stats[0][1]
            _cnt_add_update = len(changes['add']) + len(changes['update'])
            if _cnt == _cnt_add_update:
                print('...{}=={}...OK'.format(_cnt, _cnt_add_update))
            else:
                print('...{}!={}...ERROR!!!'.format(_cnt, _cnt_add_update))
        if changes['delete']:
            print('Verifying "delete"...', end='')
            _res = target.mget_from_ids(changes['delete'])
            _cnt = len([x for x in _res if x])
            if _cnt == 0:
                print('...{}==0...OK'.format(_cnt))
            else:
                print('...{}!=0...ERROR!!!'.format(_cnt))

        print("Verifying all docs have timestamp...", end='')
        _cnt = sum([x[1] for x in ts_stats])
        _cnt_all = self.count()['count']
        if _cnt == _cnt_all:
            print('{}=={}...OK'.format(_cnt, _cnt_all))
        else:
            print('ERROR!!!\n\t Should be "{}", but get "{}"'.format(_cnt_all, _cnt))

        print("Verifying all new docs have updated timestamp...")
        ts = time.mktime(_timestamp.utctimetuple())
        ts = ts - 8 * 3600    # convert to utc timestamp, here 8 hour difference is hard-coded (PST)
        ts = int(ts * 1000)
        q = TermQuery()
        q.add('_timestamp', ts)
        cur = self.doc_feeder(query=q, fields=[], step=10000)
        _li1 = sorted(changes['add'] + [x['_id'] for x in changes['update']])
        _li2 = sorted([x['_id'] for x in cur])
        if _li1 == _li2:
            print("{}=={}...OK".format(len(_li1), len(_li2)))
        else:
            print('ERROR!!!\n\t Should be "{}", but get "{}"'.format(len(_li1), len(_li2)))