Beispiel #1
0
 def run(self, updates_since: datetime=datetime.min):
     """
     Runs the retriever in the same thread.
     :param updates_since: the time from which to get updates from (defaults to getting all updates).
     """
     updates_since = localise_to_utc(updates_since)
     self._do_retrieval(updates_since)
Beispiel #2
0
    def run(self, updates_since: datetime=datetime.min):
        self._updates_since = localise_to_utc(updates_since)

        with self._state_lock:
            if self._running:
                raise RuntimeError("Already running")
            self._running = True
        self._scheduler.start()
Beispiel #3
0
    def get_most_recent(self) -> List[Update]:
        """
        Gets the updates in the collection with the most recent timestamp.

        O(n) operation.
        :return: the updates in the collection with the most recent timestamp
        """
        if len(self) == 0:
            raise ValueError("No updates in collection")

        most_recent = [Update("sentinel", datetime.min, Metadata())]
        for update in self:
            assert len(most_recent) > 0
            most_recent_so_far = localise_to_utc(most_recent[0].timestamp)
            timestamp = localise_to_utc(update.timestamp)
            assert timestamp != datetime.min

            if timestamp > most_recent_so_far:
                most_recent.clear()
            if timestamp >= most_recent_so_far:
                most_recent.append(update)

        return most_recent
    def get_all_since(self, since: datetime) -> UpdateCollection:
        # iRODS works with Epoch time therefore ensure `since` is localised as UTC
        since = localise_to_utc(since)

        if since < _EPOCH:
            since = _EPOCH

        since_timestamp = str(int(since.timestamp()))
        until_timestamp = str(_MAX_IRODS_TIMESTAMP)

        arguments = [since_timestamp, until_timestamp]
        aliases = [MODIFIED_DATA_QUERY_ALIAS, MODIFIED_METADATA_QUERY_ALIAS]
        all_updates = []  # type: List[DataObjectUpdate]
        semaphore = Semaphore(0)
        error = None    # type: Optional(Exception)

        def run_threaded(alias: str):
            updates_query = PreparedSpecificQuery(alias, arguments)
            try:
                started_at = time.monotonic()
                updates = self._get_with_prepared_specific_query(updates_query, zone=self.zone)
                logging.info("Took %f seconds (wall time) to get and then parse %d iRODS updates using `%s` query"
                             % (time.monotonic() - started_at, len(updates), alias))
                all_updates.extend(list(updates))
            except Exception as e:
                nonlocal error
                error = e
            finally:
                semaphore.release()

        for alias in aliases:
            Thread(target=run_threaded, args=(alias, )).start()
        for _ in range(len(aliases)):
            semaphore.acquire()
            if error is not None:
                raise error

        started_at = time.monotonic()
        combined_modifications = BatonUpdateMapper._combine_updates_for_same_entity(all_updates)
        logging.info("Took %f seconds (wall time) to merge %d updates related to %d data objects"
                     % (time.monotonic() - started_at, len(all_updates), len(combined_modifications)))

        # Package modifications into `UpdateCollection`
        started_at = time.monotonic()
        updates = BatonUpdateMapper._data_object_updates_to_generic_update_collection(combined_modifications)
        logging.info("Took %f seconds (wall time) to convert %d updates to generic updates that can be stored in the "
                     "knowledge base" % (time.monotonic() - started_at, len(combined_modifications)))

        return updates
from threading import Thread, Semaphore, Lock
from typing import List
from unittest.mock import MagicMock, call

from hgicommon.collections import Metadata
from hgijson.json.primitive import DatetimeISOFormatJSONDecoder

from cookiemonster.common.collections import UpdateCollection
from cookiemonster.common.helpers import localise_to_utc
from cookiemonster.common.models import Update
from cookiemonster.retriever.manager import PeriodicRetrievalManager, RetrievalManager, MEASURED_RETRIEVAL, \
    MEASURED_RETRIEVAL_STARTED_AT, MEASURED_RETRIEVAL_MOST_RECENT_RETRIEVED, MEASURED_RETRIEVAL_UPDATE_COUNT, \
    MEASURED_RETRIEVAL_DURATION
from cookiemonster.tests.retriever._stubs import StubUpdateMapper

SINCE = localise_to_utc(datetime.min)
TIME_TAKEN_TO_DO_RETRIEVE = 1.0
RETRIEVAL_PERIOD = 0.0001
CURRENT_MONOTONIC_TIME = 0
CURRENT_CLOCK_TIME = datetime(1, 2, 3)


class _BaseRetrievalManagerTest(unittest.TestCase):
    """
    Base class for unit tests on `RetrievalManager` instances.
    """
    def setUp(self):
        self.update_mapper = StubUpdateMapper()
        self.logger = MagicMock()

        self.updates = UpdateCollection([