Example #1
0
def create_unit_of_work(process_name, first_object_id, last_object_id):
    """ method is used to insert unit_of_work """
    source_collection = ProcessContext.get_source_collection(process_name)
    target_collection = ProcessContext.get_target_collection(process_name)
    logger = ProcessContext.get_logger(process_name)
    
    unit_of_work = UnitOfWorkEntry()
    unit_of_work.set_timestamp('UNIT_TEST')
    unit_of_work.set_start_id(first_object_id)
    unit_of_work.set_end_id(last_object_id)
    unit_of_work.set_source_collection(source_collection)
    unit_of_work.set_target_collection(target_collection)
    unit_of_work.set_state(UnitOfWorkEntry.STATE_REQUESTED)
    unit_of_work.set_process_name(process_name)
    unit_of_work.set_number_of_retries(0)
    
    uow_id = unit_of_work_helper.insert(logger, unit_of_work)
    return uow_id
    def _process_single_document(self, document):
        """ actually inspects UOW retrieved from the database"""
        repost = False
        unit_of_work = UnitOfWorkEntry(document)
        process_name = unit_of_work.get_process_name()

        if unit_of_work.get_state() == UnitOfWorkEntry.STATE_INVALID:
            repost = True
        elif (
            unit_of_work.get_state() == UnitOfWorkEntry.STATE_IN_PROGRESS
            or unit_of_work.get_state() == UnitOfWorkEntry.STATE_REQUESTED
        ):

            last_activity = unit_of_work.get_started_at()
            if last_activity is None:
                last_activity = unit_of_work.get_created_at()

            if datetime.utcnow() - last_activity > timedelta(hours=REPOST_AFTER_HOURS):
                repost = True

        if repost:
            creation_time = unit_of_work.get_created_at()
            if datetime.utcnow() - creation_time < timedelta(hours=LIFE_SUPPORT_HOURS):
                unit_of_work.set_state(UnitOfWorkEntry.STATE_REQUESTED)
                unit_of_work.set_number_of_retries(unit_of_work.get_number_of_retries() + 1)
                unit_of_work_helper.update(self.logger, unit_of_work)
                self.publishers.get_publisher(process_name).publish(str(document["_id"]))

                self.logger.info(
                    "UOW marked for re-processing: process %s; id %s; attempt %d"
                    % (process_name, str(document["_id"]), unit_of_work.get_number_of_retries())
                )
                self.performance_ticker.increment()
            else:
                unit_of_work.set_state(UnitOfWorkEntry.STATE_CANCELED)
                unit_of_work_helper.update(self.logger, unit_of_work)
                self.logger.info(
                    "UOW transfered to STATE_CANCELED: process %s; id %s; attempt %d"
                    % (process_name, str(document["_id"]), unit_of_work.get_number_of_retries())
                )
    def compute_scope_of_processing(self, process_name, start_time, end_time, time_record):
        """method reads collection and identify slice for processing"""
        source_collection_name = ProcessContext.get_source_collection(process_name)
        target_collection_name = ProcessContext.get_target_collection(process_name)
        source_collection = CollectionContext.get_collection(self.logger, source_collection_name)

        query = { AbstractModel.TIMESTAMP : { '$gte' : start_time, '$lt' : end_time } }
        asc_search = source_collection.find(spec=query, fields='_id').sort('_id', ASCENDING).limit(1)
        if asc_search.count() == 0:
            raise LookupError('No messages in timeperiod: %s:%s in collection %s'
                                % (start_time, end_time, source_collection_name))
        first_object_id = asc_search[0]['_id']

        dec_search = source_collection.find(spec=query, fields='_id').sort('_id', DESCENDING).limit(1)
        last_object_id = dec_search[0]['_id']

        unit_of_work = UnitOfWorkEntry()
        unit_of_work.set_timestamp(start_time)
        unit_of_work.set_start_id(str(first_object_id))
        unit_of_work.set_end_id(str(last_object_id))
        unit_of_work.set_start_timestamp(start_time)
        unit_of_work.set_end_timestamp(end_time)
        unit_of_work.set_created_at(datetime.utcnow())
        unit_of_work.set_source_collection(source_collection_name)
        unit_of_work.set_target_collection(target_collection_name)
        unit_of_work.set_state(unit_of_work.STATE_REQUESTED)
        unit_of_work.set_process_name(process_name)
        unit_of_work.set_number_of_retries(0)

        try:
            uow_id = unit_of_work_helper.insert(self.logger, unit_of_work)
        except DuplicateKeyError as e:
            e.first_object_id = str(first_object_id)
            e.last_object_id = str(last_object_id)
            e.process_name = process_name
            e.timestamp = start_time
            raise e

        self.publishers.get_publisher(process_name).publish(str(uow_id))
        msg = 'Published: UOW %r for %r in timeperiod %r.' % (uow_id, process_name, start_time)
        self._log_message(INFO, process_name, time_record, msg)
        return unit_of_work
    def create_unit_of_work(self, logger, process_name, first_object_id, last_object_id, timestamp):
        """ method is used to insert unit_of_work """
        unit_of_work = UnitOfWorkEntry()
        unit_of_work.set_timestamp(timestamp)
        unit_of_work.set_start_timestamp(timestamp)
        unit_of_work.set_end_timestamp(timestamp)
        unit_of_work.set_start_id(first_object_id)
        unit_of_work.set_end_id(last_object_id)
        unit_of_work.set_source_collection(None)
        unit_of_work.set_target_collection(None)
        unit_of_work.set_state(UnitOfWorkEntry.STATE_REQUESTED)
        unit_of_work.set_process_name(process_name)
        unit_of_work.set_number_of_retries(0)

        uow_id = unit_of_work_helper.insert(logger, unit_of_work)
        return uow_id
    def insert_uow(self, process_name, start_time, end_time, iteration, time_record):
        """ creates unit_of_work and inserts it into the MongoDB
            @raise DuplicateKeyError if unit_of_work with given parameters already exists """
        first_object_id = 0
        last_object_id = iteration

        unit_of_work = UnitOfWorkEntry()
        unit_of_work.set_timestamp(start_time)
        unit_of_work.set_start_id(first_object_id)
        unit_of_work.set_end_id(last_object_id)
        unit_of_work.set_start_timestamp(start_time)
        unit_of_work.set_end_timestamp(end_time)
        unit_of_work.set_created_at(datetime.utcnow())
        unit_of_work.set_source_collection(None)
        unit_of_work.set_target_collection(None)
        unit_of_work.set_state(unit_of_work.STATE_REQUESTED)
        unit_of_work.set_process_name(process_name)
        unit_of_work.set_number_of_retries(0)

        try:
            uow_id = unit_of_work_helper.insert(self.logger, unit_of_work)
        except DuplicateKeyError as e:
            e.first_object_id = str(first_object_id)
            e.last_object_id = str(last_object_id)
            e.process_name = process_name
            e.timestamp = start_time
            raise e

        self.publishers.get_publisher(process_name).publish(str(uow_id))
        msg = 'Published: UOW %r for %r in timeperiod %r.' % (uow_id, process_name, start_time)
        self._log_message(INFO, process_name, time_record, msg)
        return unit_of_work