コード例 #1
0
    async def get_page_from_tid(self):
        conn = await self.txn.get_connection()
        clear_conn_statement_cache(conn)
        records = []
        queried_tid = self.last_tid
        async with self.txn._lock:
            start = timeit.default_timer()

            results = await conn.fetch(GET_OBS_BY_TID,
                                       queried_tid,
                                       timeout=TIMEOUT_PERIOD)
            stop = timeit.default_timer()
            duration = stop - start
            logger.warning(
                f"----Duration to fetch Objects by TID {duration:.2f}s")

            for record in results:
                if record['zoid'] in (ROOT_ID, TRASHED_ID,
                                      self.container._p_oid):
                    continue
                records.append(record)
                self.last_tid = record['tid']
                self.last_zoid = record['zoid']

        if len(records) == 0:
            if len(self.last_result_set) > 0:
                # now we have zero, increment, but only once
                self.last_tid = self.last_tid + 1
                print(f'Incremented last tid by one')
        self.last_result_set = records
        return records
コード例 #2
0
    async def process_object(self, ob):
        '''
        - check if doc does not exist
            - record it
            - do complete index
        - if doc exist
            - if diff mapping exists
                - update fields in diff on doc
            - else, do nothing
            - remove for list of existing doc ids
        '''
        clear_conn_statement_cache(await ob._p_jar.get_connection())
        full = False
        if ob.uuid not in self.existing:
            self.missing.append(ob.uuid)
            full = True
        else:
            self.existing.remove(ob.uuid)
        await self.index_object(ob, full=full)
        self.processed += 1

        if IFolder.providedBy(ob):
            await self.process_folder(ob)

        if not IContainer.providedBy(ob):
            del ob.__annotations__
        del ob
コード例 #3
0
 async def get_db_page_of_keys(self, oids, page=1, page_size=PAGE_SIZE):
     conn = await self.txn.get_connection()
     clear_conn_statement_cache(conn)
     async with self.txn._lock:
         return await conn.fetch(BATCHED_GET_CHILDREN_BY_PARENT,
                                 oids,
                                 page_size, (page - 1) * page_size,
                                 timeout=TIMEOUT_PERIOD)
コード例 #4
0
 async def get_db_page_of_keys(self, oids, page=1, page_size=PAGE_SIZE):
     conn = await self.txn.get_connection()
     clear_conn_statement_cache(conn)
     keys = []
     async with self.txn._lock:
         for record in await conn.fetch(
                 BATCHED_GET_CHILDREN_BY_PARENT, oids,
                 page_size, (page - 1) * page_size):
             keys.append(record['zoid'])
     return keys
コード例 #5
0
 async def check_orphans(self):
     logger.warning(
         f'Checking orphans on container {self.container.id}',
         extra={  # noqa
             'account': self.container.id
         })
     conn = await self.txn.get_connection()
     checked = 0
     async for es_batch, index_name in self.iter_batched_es_keys():
         checked += len(es_batch)
         clear_conn_statement_cache(conn)
         async with self.txn._lock:
             records = await conn.fetch(SELECT_BY_KEYS, es_batch)
         db_batch = set()
         for record in records:
             db_batch.add(record['zoid'])
         orphaned = [k for k in set(es_batch) - db_batch]
         if checked % 10000 == 0:
             logger.warning(f'Checked ophans: {checked}')
         if orphaned:
             # these are keys that are in ES but not in DB so we should
             # remove them..
             self.orphaned |= set(orphaned)
             logger.warning(f'deleting orphaned {len(orphaned)}')
             conn_es = await self.utility.conn.transport.get_connection()
             # delete by query for orphaned keys...
             async with conn_es.session.post(
                 join(conn_es.base_url.human_repr(), index_name,
                      '_delete_by_query'),
                 headers={'Content-Type': 'application/json'},
                 data=json.dumps({"query": {
                     "terms": {
                         "_id": orphaned
                     }
                 }})) as resp:  # noqa
                 try:
                     data = await resp.json()
                     if data['deleted'] != len(orphaned):
                         logger.warning(
                             f'Was only able to clean up {len(data["deleted"])} '  # noqa
                             f'instead of {len(orphaned)}')
                 except Exception:
                     logger.warning(
                         'Could not parse delete by query response. '
                         'Vacuuming might not be working')
コード例 #6
0
    async def check_orphans(self):
        logger.warning(f'Checking orphans on container {self.container.id}',
                       extra={'account': self.container.id})
        conn = await self.txn.get_connection()
        checked = 0
        async for es_batch in self.iter_batched_es_keys():
            checked += len(es_batch)
            clear_conn_statement_cache(conn)
            async with self.txn._lock:
                records = await conn.fetch(SELECT_BY_KEYS,
                                           es_batch,
                                           timeout=TIMEOUT_PERIOD)
            db_batch = set()
            for record in records:
                db_batch.add(record['zoid'])
            orphaned = [k for k in set(es_batch) - db_batch]
            if checked % 10000 == 0:
                logger.warning(f'Checked ophans: {checked}')
            if orphaned:
                # these are keys that are in ES but not in DB so we should
                # remove them..
                self.orphaned |= set(orphaned)
                logger.warning(f'deleting orphaned {len(orphaned)}')
                conn_es = await self.utility.conn.transport.get_connection()
                # delete by query for orphaned keys...
                async with conn_es._session.post(
                    '{}{}/_delete_by_query'.format(
                        conn_es._base_url.human_repr(), self.index_name),
                    data=json.dumps({'query': {
                        'terms': {
                            '_id': orphaned
                        }
                    }})) as resp:

                    try:
                        data = await resp.json()
                        if data['deleted'] != len(orphaned):
                            logger.warning(
                                f'Was only able to clean up {len(data["deleted"])} '
                                f'instead of {len(orphaned)}')
                    except Exception:
                        logger.warning(
                            'Could not parse delete by query response. '
                            'Vacuuming might not be working')
コード例 #7
0
 async def get_page_from_tid(self):
     conn = await self.txn.get_connection()
     clear_conn_statement_cache(conn)
     keys = []
     queried_tid = self.last_tid
     async with self.txn._lock:
         records = await conn.fetch(GET_OBS_BY_TID, queried_tid)
         for record in records:
             if record['zoid'] in (ROOT_ID, TRASHED_ID, self.container._p_oid):
                 continue
             keys.append(record['zoid'])
             self.last_tid = record['tid']
             self.last_zoid = record['zoid']
     if len(keys) == 0:
         if len(self.last_result_set) > 0:
             # now we have zero, increment, but only once
             self.last_tid = self.last_tid + 1
     self.last_result_set = keys
     return keys
コード例 #8
0
    async def iter_paged_db_keys(self, oids):
        if self.use_tid_query:
            queried_tid = self.last_tid
            records = await self.get_page_from_tid()
            while len(records) > 0:
                yield records
                if self.last_tid == queried_tid:
                    conn = await self.txn.get_connection()
                    logger.warning(
                        f'Getting all keys from tid {self.last_tid}')
                    # we're stuck on same tid, get all for this tid
                    # and then move on...
                    clear_conn_statement_cache(conn)
                    results = await conn.fetch(GET_ALL_FOR_TID, self.last_tid,
                                               self.last_zoid)
                    while len(results) > 0:
                        records = []
                        for record in results:
                            if record['zoid'] in (ROOT_ID, TRASHED_ID,
                                                  self.container._p_oid):
                                continue
                            records.append(record)
                            self.last_zoid = record['zoid']
                        yield records
                        clear_conn_statement_cache(conn)
                        results = await conn.fetch(GET_ALL_FOR_TID,
                                                   self.last_tid,
                                                   self.last_zoid)
                    self.last_tid = self.last_tid + 1
                queried_tid = self.last_tid
                records = await self.get_page_from_tid()

        else:
            page_num = 1
            page = await self.get_db_page_of_keys(oids, page_num)
            while page:
                yield page
                async for sub_page in self.iter_paged_db_keys(
                    [r['zoid'] for r in page]):
                    yield sub_page
                page_num += 1
                page = await self.get_db_page_of_keys(oids, page_num)
コード例 #9
0
    async def get_object(self, oid):
        if oid in self.cache:
            return self.cache[oid]

        try:
            result = self.txn._manager._hard_cache.get(oid, None)
        except AttributeError:
            from guillotina.db.transaction import HARD_CACHE  # pylint: disable=E0611
            result = HARD_CACHE.get(oid, None)
        if result is None:
            clear_conn_statement_cache(await self.txn.get_connection())
            result = await self.txn._cache.get(oid=oid)

        if result is None:
            result = await self.tm._storage.load(self.txn, oid)

        obj = reader(result)
        obj._p_jar = self.txn
        if result['parent_id']:
            obj.__parent__ = await self.get_object(result['parent_id'])
        return obj
コード例 #10
0
 async def check_orphans(self):
     logger.warning(f'Checking orphans on container {self.container.id}', extra={
         'account': self.container.id
     })
     conn = await self.txn.get_connection()
     checked = 0
     async for es_batch in self.iter_batched_es_keys():
         checked += len(es_batch)
         clear_conn_statement_cache(conn)
         async with self.txn._lock:
             records = await conn.fetch(SELECT_BY_KEYS, es_batch)
         db_batch = set()
         for record in records:
             db_batch.add(record['zoid'])
         orphaned = [k for k in set(es_batch) - db_batch]
         if checked % 10000 == 0:
             logger.warning(f'Checked ophans: {checked}')
         if orphaned:
             # these are keys that are in ES but not in DB so we should
             # remove them..
             self.orphaned.extend(orphaned)
             logger.warning(f'deleting orphaned {len(orphaned)}')
             conn_es = await self.utility.conn.transport.get_connection()
             # delete by query for orphaned keys...
             async with conn_es.session.post(
                     join(conn_es.base_url.human_repr(),
                          self.index_name, '_delete_by_query'),
                     headers={
                         'Content-Type': 'application/json'
                     },
                     data=json.dumps({
                         'query': {
                             'terms': {
                                 'uuid': orphaned
                             }
                         }
                     })) as resp:
                 pass
コード例 #11
0
    async def iter_paged_db_keys(self, oids):
        if self.use_tid_query:
            queried_tid = self.last_tid
            records = await self.get_page_from_tid()
            while len(records) > 0 or retry:
                try:
                    yield records
                    retry = False

                    if self.last_tid == queried_tid:
                        conn = await self.txn.get_connection()
                        logger.warning(
                            f'Getting all keys for tid {self.last_tid}')
                        # we're stuck on same tid, get all for this tid
                        # and then move on...
                        clear_conn_statement_cache(conn)
                        start = timeit.default_timer()
                        results = await conn.fetch(GET_ALL_FOR_TID,
                                                   self.last_tid,
                                                   self.last_zoid,
                                                   timeout=TIMEOUT_PERIOD)
                        stop = timeit.default_timer()
                        duration = stop - start
                        logger.warning(f" Got all for TID in {duration:.2f}s")
                        while len(results) > 0:
                            records = []
                            for record in results:
                                if record['zoid'] in (ROOT_ID, TRASHED_ID,
                                                      self.container._p_oid):
                                    continue
                                records.append(record)
                                self.last_zoid = record['zoid']
                            yield records
                            clear_conn_statement_cache(conn)
                            start = timeit.default_timer()
                            results = await conn.fetch(GET_ALL_FOR_TID,
                                                       self.last_tid,
                                                       self.last_zoid,
                                                       timeout=TIMEOUT_PERIOD)
                            stop = timeit.default_timer()
                            duration = stop - start
                            logger.warning(
                                f"More results - Got all for TID in {duration:.2f}s"
                            )
                        self.last_tid = self.last_tid + 1

                    queried_tid = self.last_tid
                    records = await self.get_page_from_tid()
                except Exception:
                    logger.error('Could not get keys for tid, retrying...',
                                 exc_info=True)
                    retry = True

        else:
            page_num = 1
            page = await self.get_db_page_of_keys(oids, page_num)
            while page:
                yield page
                async for sub_page in self.iter_paged_db_keys(
                    [r['zoid'] for r in page]):
                    yield sub_page
                page_num += 1
                page = await self.get_db_page_of_keys(oids, page_num)