def _callback_reprocess(self, process_name, timestamp, tree_node): """ is called from tree to answer reprocessing request. It is possible that timetable record will be transfered to STATE_IN_PROGRESS with no related unit_of_work""" uow_id = tree_node.time_record.get_related_unit_of_work() if uow_id is not None: tree_node.time_record.set_state(TimeTableEntry.STATE_IN_PROGRESS) uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id)) uow_obj.set_state(uow_obj.STATE_INVALID) uow_obj.set_number_of_retries(0) uow_obj.set_created_at(datetime.utcnow()) unit_of_work_helper.update(self.logger, uow_obj) msg = 'Transferred time-record %s in timeperiod %s to %s; Transferred unit_of_work to %s'\ % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(), tree_node.time_record.get_state(), uow_obj.get_state()) else: tree_node.time_record.set_state(TimeTableEntry.STATE_EMBRYO) msg = 'Transferred time-record %s in timeperiod %s to %s;'\ % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(), tree_node.time_record.get_state()) tree_node.time_record.set_number_of_failures(0) self._save_time_record(process_name, tree_node.time_record) self.logger.warning(msg) tree_node.add_log_entry([datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), msg]) if process_name not in self.reprocess: self.reprocess[process_name] = dict() self.reprocess[process_name][timestamp] = tree_node
def update_scope_of_processing(self, process_name, unit_of_work, start_time, end_time, time_record): """method reads collection and refine slice upper bound for processing""" source_collection_name = unit_of_work.get_source_collection() source_collection = CollectionContext.get_collection(self.logger, source_collection_name) query = { AbstractModel.TIMESTAMP : { '$gte' : start_time, '$lt' : end_time } } dec_search = source_collection.find(spec=query, fields='_id').sort('_id', DESCENDING).limit(1) last_object_id = dec_search[0]['_id'] unit_of_work.set_end_id(str(last_object_id)) unit_of_work_helper.update(self.logger, unit_of_work) msg = 'Updated range to process for %s in timeperiod %s for collection %s: [%s : %s]'\ % (process_name, time_record.get_timestamp(), source_collection_name, unit_of_work.get_start_id(), str(last_object_id)) self._log_message(INFO, process_name, time_record, msg)
def _mq_callback(self, message): """ try/except wrapper in case exception breaks the abstract method, this method: - catches the exception - logs the exception - marks unit of work as INVALID""" unit_of_work = None try: # @param object_id: ObjectId of the unit_of_work from mq object_id = ObjectId(message.body) unit_of_work = unit_of_work_helper.retrieve_by_id(self.logger, object_id) if unit_of_work.get_state() == UnitOfWorkEntry.STATE_CANCELED \ or unit_of_work.get_state() == UnitOfWorkEntry.STATE_PROCESSED: # garbage collector might have reposted this UOW self.logger.warning('Skipping unit_of_work: id %s; state %s;' \ % (str(message.body), unit_of_work.get_state()), exc_info=False) self.consumer.acknowledge(message.delivery_tag) return except Exception: self.logger.error('Safety fuse. Can not identify unit_of_work %s' % str(message.body), exc_info=True) self.consumer.acknowledge(message.delivery_tag) return try: self.performance_ticker.start_uow(unit_of_work) unit_of_work.set_state(UnitOfWorkEntry.STATE_PROCESSED) unit_of_work.set_number_of_processed_documents(0) unit_of_work.set_started_at(datetime.utcnow()) unit_of_work.set_finished_at(datetime.utcnow()) unit_of_work_helper.update(self.logger, unit_of_work) self.performance_ticker.finish_uow() except Exception as e: unit_of_work.set_state(UnitOfWorkEntry.STATE_INVALID) unit_of_work_helper.update(self.logger, unit_of_work) self.performance_ticker.cancel_uow() self.logger.error('Safety fuse while processing unit_of_work %s in timeperiod %s : %r'\ % (message.body, unit_of_work.get_timestamp(), e), exc_info=True) finally: self.consumer.acknowledge(message.delivery_tag) self.consumer.close()
def _process_single_document(self, document): """ actually inspects UOW retrieved from the database""" repost = False unit_of_work = UnitOfWorkEntry(document) process_name = unit_of_work.get_process_name() if unit_of_work.get_state() == UnitOfWorkEntry.STATE_INVALID: repost = True elif ( unit_of_work.get_state() == UnitOfWorkEntry.STATE_IN_PROGRESS or unit_of_work.get_state() == UnitOfWorkEntry.STATE_REQUESTED ): last_activity = unit_of_work.get_started_at() if last_activity is None: last_activity = unit_of_work.get_created_at() if datetime.utcnow() - last_activity > timedelta(hours=REPOST_AFTER_HOURS): repost = True if repost: creation_time = unit_of_work.get_created_at() if datetime.utcnow() - creation_time < timedelta(hours=LIFE_SUPPORT_HOURS): unit_of_work.set_state(UnitOfWorkEntry.STATE_REQUESTED) unit_of_work.set_number_of_retries(unit_of_work.get_number_of_retries() + 1) unit_of_work_helper.update(self.logger, unit_of_work) self.publishers.get_publisher(process_name).publish(str(document["_id"])) self.logger.info( "UOW marked for re-processing: process %s; id %s; attempt %d" % (process_name, str(document["_id"]), unit_of_work.get_number_of_retries()) ) self.performance_ticker.increment() else: unit_of_work.set_state(UnitOfWorkEntry.STATE_CANCELED) unit_of_work_helper.update(self.logger, unit_of_work) self.logger.info( "UOW transfered to STATE_CANCELED: process %s; id %s; attempt %d" % (process_name, str(document["_id"]), unit_of_work.get_number_of_retries()) )
def _callback_skip(self, process_name, timestamp, tree_node): """ is called from tree to answer skip request""" tree_node.time_record.set_state(TimeTableEntry.STATE_SKIPPED) uow_id = tree_node.time_record.get_related_unit_of_work() if uow_id is not None: uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id)) uow_obj.set_state(uow_obj.STATE_CANCELED) unit_of_work_helper.update(self.logger, uow_obj) msg = 'Transferred time-record %s in timeperiod %s to %s; Transferred unit_of_work to %s'\ % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(), tree_node.time_record.get_state(), uow_obj.get_state()) else: msg = 'Transferred time-record %s in timeperiod %s to %s;'\ % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(), tree_node.time_record.get_state()) self._save_time_record(process_name, tree_node.time_record) self.logger.warning(msg) tree_node.add_log_entry([datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), msg]) if process_name in self.reprocess and timestamp in self.reprocess[process_name]: del self.reprocess[process_name][timestamp]
def _mq_callback(self, message): """ try/except wrapper in case exception breaks the abstract method, this method: - catches the exception - logs the exception - marks unit of work as INVALID""" unit_of_work = None try: # @param object_id: ObjectId of the unit_of_work from mq object_id = ObjectId(message.body) unit_of_work = unit_of_work_helper.retrieve_by_id(self.logger, object_id) if unit_of_work.get_state() == UnitOfWorkEntry.STATE_CANCELED \ or unit_of_work.get_state() == UnitOfWorkEntry.STATE_PROCESSED: # garbage collector might have reposted this UOW self.logger.warning('Skipping unit_of_work: id %s; state %s;' \ % (str(message.body), unit_of_work.get_state()), exc_info=False) self.consumer.acknowledge(message.delivery_tag) return except Exception: self.logger.error('Safety fuse. Can not identify unit_of_work %s' % str(message.body), exc_info=True) self.consumer.acknowledge(message.delivery_tag) return try: start_id_obj = ObjectId(unit_of_work.get_start_id()) end_id_obj = ObjectId(unit_of_work.get_end_id()) start_timestamp = unit_of_work.get_start_timestamp() end_timestamp = unit_of_work.get_end_timestamp() unit_of_work.set_state(UnitOfWorkEntry.STATE_IN_PROGRESS) unit_of_work.set_started_at(datetime.utcnow()) unit_of_work_helper.update(self.logger, unit_of_work) self.performance_ticker.start_uow(unit_of_work) bulk_threshold = settings['bulk_threshold'] iteration = 0 while True: source_collection = self._get_source_collection() if iteration == 0: queue = { '_id' : { '$gte' : start_id_obj, '$lte' : end_id_obj } } else: queue = { '_id' : { '$gt' : start_id_obj, '$lte' : end_id_obj } } if start_timestamp is not None and end_timestamp is not None: # remove all accident objects that may be in [start_id_obj : end_id_obj] range queue[AbstractModel.TIMESTAMP] = { '$gte' : start_timestamp, '$lt' : end_timestamp } cursor = source_collection.find(queue).sort('_id', ASCENDING).limit(bulk_threshold) count = cursor.count(with_limit_and_skip=True) if count == 0 and iteration == 0: msg = 'No entries in %s at range [%s : %s]'\ % (str(source_collection.name), unit_of_work.get_start_id(), unit_of_work.get_end_id()) self.logger.warning(msg) break else: shall_continue, new_start_id = self._process_not_empty_cursor(cursor) if shall_continue: start_id_obj = new_start_id iteration += 1 else: break msg = 'Cursor exploited after %s iterations' % str(iteration) self.logger.info(msg) self.perform_post_processing(unit_of_work.get_timestamp()) number_of_aggregated_objects = self._flush_aggregated_objects() unit_of_work.set_number_of_aggregated_documents(number_of_aggregated_objects) unit_of_work.set_number_of_processed_documents(self.performance_ticker.posts_per_job) unit_of_work.set_finished_at(datetime.utcnow()) unit_of_work.set_state(UnitOfWorkEntry.STATE_PROCESSED) unit_of_work_helper.update(self.logger, unit_of_work) self.performance_ticker.finish_uow() except Exception as e: unit_of_work.set_state(UnitOfWorkEntry.STATE_INVALID) unit_of_work_helper.update(self.logger, unit_of_work) self.performance_ticker.cancel_uow() del self.aggregated_objects self.aggregated_objects = dict() gc.collect() self.logger.error('Safety fuse while processing unit_of_work %s in timeperiod %s : %r'\ % (message.body, unit_of_work.get_timestamp(), e), exc_info=True) finally: self.consumer.acknowledge(message.delivery_tag) self.consumer.close()