class AbstractUowAwareWorker(AbstractMqWorker): """ Abstract class is inherited by all workers/aggregators that are aware of unit_of_work and capable of processing it """ def __init__(self, process_name, perform_db_logging=False): super(AbstractUowAwareWorker, self).__init__(process_name) self.perform_db_logging = perform_db_logging self.uow_dao = UnitOfWorkDao(self.logger) self.mq_transmitter = MqTransmitter(self.logger) def __del__(self): del self.mq_transmitter super(AbstractUowAwareWorker, self).__del__() # **************** Abstract Methods ************************ def _init_performance_tracker(self, logger): self.performance_tracker = UowAwareTracker(logger) self.performance_tracker.start() def _process_uow(self, uow): """ :param uow: unit_of_work to process :return: a tuple (number of processed items/documents/etc, desired unit_of_work state) or None if None is returned then it is assumed that the return tuple is (0, unit_of_work.STATE_PROCESSED) :raise an Exception if the UOW shall be marked as STATE_INVALID """ raise NotImplementedError('method _process_uow must be implemented by {0}'.format(self.__class__.__name__)) def _clean_up(self): """ method is called from the *finally* clause and is suppose to clean up after the uow processing """ pass def _mq_callback(self, message): try: mq_request = MqTransmission.from_json(message.body) uow = self.uow_dao.get_one(mq_request.record_db_id) if not uow.is_requested: # accept only UOW in STATE_REQUESTED self.logger.warning('Skipping UOW: id {0}; state {1};'.format(message.body, uow.state), exc_info=False) self.consumer.acknowledge(message.delivery_tag) return except Exception: self.logger.error('Safety fuse. Can not identify UOW {0}'.format(message.body), exc_info=True) self.consumer.acknowledge(message.delivery_tag) return db_log_handler = UowLogHandler(self.logger, uow.db_id) try: uow.state = unit_of_work.STATE_IN_PROGRESS uow.started_at = datetime.utcnow() self.uow_dao.update(uow) self.performance_tracker.start_uow(uow) if self.perform_db_logging: db_log_handler.attach() result = self._process_uow(uow) if result is None: self.logger.warning('method {0}._process_uow returned None. Assuming happy flow.' .format(self.__class__.__name__)) number_of_aggregated_objects, target_state = 0, unit_of_work.STATE_PROCESSED else: number_of_aggregated_objects, target_state = result uow.number_of_aggregated_documents = number_of_aggregated_objects uow.number_of_processed_documents = self.performance_tracker.success_per_job uow.finished_at = datetime.utcnow() uow.state = target_state self.uow_dao.update(uow) if uow.is_finished: self.performance_tracker.finish_uow() else: self.performance_tracker.cancel_uow() except Exception as e: fresh_uow = self.uow_dao.get_one(mq_request.record_db_id) self.performance_tracker.cancel_uow() if fresh_uow.is_canceled: self.logger.warning('UOW {0} for {1}@{2} was likely marked by MX as SKIPPED. No UOW update performed.' .format(uow.db_id, uow.process_name, uow.timeperiod), exc_info=False) else: self.logger.error('Safety fuse while processing UOW {0} for {1}@{2}: {3}' .format(uow.db_id, uow.process_name, uow.timeperiod, e), exc_info=True) uow.state = unit_of_work.STATE_INVALID self.uow_dao.update(uow) finally: self.consumer.acknowledge(message.delivery_tag) self.consumer.close() self._clean_up() db_log_handler.detach() try: self.mq_transmitter.publish_uow_status(uow) self.logger.info('UOW *{0}* status report published into MQ'.format(uow.state)) except Exception: self.logger.error('Error on UOW status report publishing', exc_info=True)
class BashRunnable(threading.Thread): """Process starts remote or local bash script job, supervises its execution and updates mq""" def __init__(self, logger, message, consumer, performance_ticker): self.logger = logger self.message = message self.mq_request = SynergyMqTransmission.from_json(message.body) self.consumer = consumer self.performance_ticker = performance_ticker self.alive = False self.return_code = -1 self.uow_dao = UnitOfWorkDao(self.logger) self.thread_name = str(self.mq_request) super(BashRunnable, self).__init__(name=self.thread_name) def _poll_process(self): return self.alive, self.return_code def _start_process(self): try: uow = self.uow_dao.get_one(self.mq_request.unit_of_work_id) if not uow.is_requested: # accept only UOW in STATE_REQUESTED self.logger.warn('Skipping unit_of_work: id %s; state %s;' % (str(self.message.body), uow.state), exc_info=False) self.consumer.acknowledge(self.message.delivery_tag) return except Exception: self.logger.error('Safety fuse. Can not identify unit_of_work %s' % str(self.message.body), exc_info=True) self.consumer.acknowledge(self.message.delivery_tag) return try: self.logger.info('start: %s {' % self.thread_name) self.alive = True uow.state = unit_of_work.STATE_IN_PROGRESS uow.started_at = datetime.utcnow() self.uow_dao.update(uow) fabric.operations.env.warn_only = True fabric.operations.env.abort_on_prompts = True fabric.operations.env.use_ssh_config = True fabric.operations.env.host_string = uow.arguments[ARGUMENT_CMD_HOST] command = os.path.join(uow.arguments[ARGUMENT_CMD_PATH], uow.arguments[ARGUMENT_CMD_FILE]) command += ' %s' % uow.arguments[ARGUMENT_CMD_ARGS] run_result = fabric.operations.run(command, pty=False) if run_result.succeeded: self.return_code = 0 uow.finished_at = datetime.utcnow() uow.state = unit_of_work.STATE_PROCESSED self.uow_dao.update(uow) self.logger.info('Completed %s with result = %r' % (self.thread_name, self.return_code)) except Exception: self.logger.error('Exception on starting: %s' % self.thread_name, exc_info=True) uow.state = unit_of_work.STATE_INVALID self.uow_dao.update(uow) finally: self.logger.info('}') self.alive = False def run(self): try: self._start_process() code = None alive = True while alive: alive, code = self._poll_process() time.sleep(0.1) if code == 0: self.performance_ticker.tracker.increment_success() else: self.performance_ticker.tracker.increment_failure() self.logger.info('BashDriver for %s return code is %r' % (self.thread_name, code)) except Exception as e: self.performance_ticker.tracker.increment_failure() self.logger.error('Safety fuse while processing request %r: %r' % (self.message.body, e), exc_info=True) finally: self.consumer.acknowledge(self.message.delivery_tag)
class AbstractStateMachine(object): """ Abstract state machine used to host common logic for the rest of registered State Machines """ def __init__(self, logger, timetable, name): self.name = name self.logger = logger self.mq_transmitter = MqTransmitter(self.logger) self.timetable = timetable self.uow_dao = UnitOfWorkDao(self.logger) self.job_dao = JobDao(self.logger) @property def run_on_active_timeperiod(self): """ :return: True if given State Machine allows execution on *live* timeperiod, as opposed to the finished one """ raise NotImplementedError('property run_on_active_timeperiod must be implemented by {0}' .format(self.__class__.__name__)) def _log_message(self, level, process_name, timeperiod, msg): """ method performs logging into log file and Timetable's tree node""" self.timetable.add_log_entry(process_name, timeperiod, msg) self.logger.log(level, msg) @with_reconnect def _insert_uow(self, process_name, timeperiod, start_timeperiod, end_timeperiod, start_id, end_id): """creates unit_of_work and inserts it into the DB :raise DuplicateKeyError: if unit_of_work with given parameters already exists """ uow = UnitOfWork() uow.process_name = process_name uow.timeperiod = timeperiod uow.start_id = str(start_id) uow.end_id = str(end_id) uow.start_timeperiod = start_timeperiod uow.end_timeperiod = end_timeperiod uow.created_at = datetime.utcnow() uow.submitted_at = datetime.utcnow() uow.source = context.process_context[process_name].source uow.sink = context.process_context[process_name].sink uow.state = unit_of_work.STATE_REQUESTED uow.unit_of_work_type = unit_of_work.TYPE_MANAGED uow.number_of_retries = 0 uow.arguments = context.process_context[process_name].arguments uow.db_id = self.uow_dao.insert(uow) msg = 'Created: UOW {0} for {1}@{2}.'.format(uow.db_id, process_name, start_timeperiod) self._log_message(INFO, process_name, start_timeperiod, msg) return uow def _publish_uow(self, uow): self.mq_transmitter.publish_managed_uow(uow) msg = 'Published: UOW {0} for {1}@{2}.'.format(uow.db_id, uow.process_name, uow.start_timeperiod) self._log_message(INFO, uow.process_name, uow.start_timeperiod, msg) def insert_and_publish_uow(self, job_record, start_id, end_id): """ method creates and publishes a unit_of_work. it also handles DuplicateKeyError and attempts recovery :return: tuple (uow, is_duplicate) :raise UserWarning: if the recovery from DuplicateKeyError was unsuccessful """ process_name = job_record.process_name timeperiod = job_record.timeperiod start_timeperiod = self.compute_start_timeperiod(job_record.process_name, job_record.timeperiod) end_timeperiod = self.compute_end_timeperiod(job_record.process_name, job_record.timeperiod) try: is_duplicate = False uow = self._insert_uow(process_name, timeperiod, start_timeperiod, end_timeperiod, start_id, end_id) except DuplicateKeyError as e: is_duplicate = True msg = 'Catching up with latest UOW {0}@{1}, because of: {2}' \ .format(process_name, start_timeperiod, e) self._log_message(WARNING, process_name, start_timeperiod, msg) uow = self.uow_dao.recover_from_duplicatekeyerror(e) if not uow: msg = 'PERSISTENT TIER ERROR! Unable to locate UOW for {0}@{1}' \ .format(process_name, start_timeperiod) self._log_message(WARNING, process_name, start_timeperiod, msg) raise UserWarning(msg) if uow.is_canceled: # this UOW was marked for re-processing. recycle it uow.created_at = datetime.utcnow() # reset created_at to bypass GC cancellation logic uow.submitted_at = datetime.utcnow() # reset submitted_at to allow 1 hour free of GC resubmitting del uow.started_at del uow.finished_at del uow.number_of_aggregated_documents del uow.number_of_processed_documents uow.state = unit_of_work.STATE_REQUESTED self.uow_dao.update(uow) # publish the created/recovered/recycled unit_of_work self._publish_uow(uow) return uow, is_duplicate def notify(self, uow): """ method is used by StateMachine's users to provide notifications about change in UOW if applicable, method will update job_record state and Timetable tree node state :assumptions: uow is in [STATE_NOOP, STATE_CANCELED, STATE_PROCESSED] """ pass def compute_start_timeperiod(self, process_name, timeperiod): """ computes lowest *inclusive* timeperiod boundary for job to process for process with time_grouping == 1, it returns given timeperiod with no change for process with time_grouping != 1, it computes first timeperiod, not processed by the previous job run For instance: with time_grouping = 3, QUALIFIER_HOURLY, and timeperiod = 2016042018, the start_timeperiod will be = 2016042016 (computed as 2016042018 - 3 + 1) """ time_grouping = context.process_context[process_name].time_grouping if time_grouping == 1: return timeperiod # step1: translate given timeperiod to the time grouped one process_hierarchy = self.timetable.get_tree(process_name).process_hierarchy timeperiod_dict = process_hierarchy[process_name].timeperiod_dict translated_timeperiod = timeperiod_dict._translate_timeperiod(timeperiod) # step 2: compute previous grouped period # NOTICE: simple `time_helper.increment_timeperiod(time_qualifier, timeperiod)` is insufficient # as it does not address edge cases, such as the last day of the month or the last hour of the day # For instance: with time_grouping=3, QUALIFIER_DAILY, and 2016123100 # the `increment_timeperiod` will yield 2016122800 instead of 2016123100 time_qualifier = context.process_context[process_name].time_qualifier for i in range(1, time_grouping + 1): prev_timeperiod = time_helper.increment_timeperiod(time_qualifier, translated_timeperiod, delta=-i) if prev_timeperiod == timeperiod_dict._translate_timeperiod(prev_timeperiod): # prev_timeperiod is currently at the last grouped timeperiod break # step 3: compute first exclusive timeperiod after the *prev_timeperiod*, # which becomes first inclusive timeperiod for this job run over_the_edge_timeperiod = time_helper.increment_timeperiod(time_qualifier, prev_timeperiod, delta=-1) if prev_timeperiod != timeperiod_dict._translate_timeperiod(over_the_edge_timeperiod): # over_the_edge_timeperiod fell into previous day or month or year # *prev_timeperiod* points to the first month, first day of the month or 00 hour start_timeperiod = prev_timeperiod else: start_timeperiod = self.compute_end_timeperiod(process_name, prev_timeperiod) return start_timeperiod def compute_end_timeperiod(self, process_name, timeperiod): """ computes first *exclusive* timeperiod for job to process """ time_qualifier = context.process_context[process_name].time_qualifier end_timeperiod = time_helper.increment_timeperiod(time_qualifier, timeperiod) return end_timeperiod def _is_noop_timeperiod(self, process_name, timeperiod): """ method verifies if the given timeperiod for given process is valid or falls in-between grouping checkpoints :param process_name: name of the process :param timeperiod: timeperiod to verify :return: False, if given process has no time_grouping set or it is equal to 1. False, if time_grouping is custom but the given timeperiod matches the grouped timeperiod. True, if the timeperiod falls in-between grouping cracks """ time_grouping = context.process_context[process_name].time_grouping if time_grouping == 1: return False process_hierarchy = self.timetable.get_tree(process_name).process_hierarchy timeperiod_dict = process_hierarchy[process_name].timeperiod_dict return timeperiod_dict._translate_timeperiod(timeperiod) != timeperiod def _process_noop_timeperiod(self, job_record): """ method is valid for processes having time_grouping != 1. should a job record fall in-between grouped time milestones, its state should be set to STATE_NOOP without any processing """ job_record.state = job.STATE_NOOP self.job_dao.update(job_record) time_grouping = context.process_context[job_record.process_name].time_grouping msg = 'Job {0}@{1} with time_grouping {2} was transferred to STATE_NOOP' \ .format(job_record.process_name, job_record.timeperiod, time_grouping) self._log_message(INFO, job_record.process_name, job_record.timeperiod, msg) def _process_state_embryo(self, job_record): """ method that takes care of processing job records in STATE_EMBRYO state""" pass def _process_state_in_progress(self, job_record): """ method that takes care of processing job records in STATE_IN_PROGRESS state""" pass def _process_state_final_run(self, job_record): """method takes care of processing job records in STATE_FINAL_RUN state""" uow = self.uow_dao.get_one(job_record.related_unit_of_work) if uow.is_processed: self.update_job(job_record, uow, job.STATE_PROCESSED) elif uow.is_noop: self.update_job(job_record, uow, job.STATE_NOOP) elif uow.is_canceled: self.update_job(job_record, uow, job.STATE_SKIPPED) elif uow.is_invalid: msg = 'Job {0}: UOW for {1}@{2} is in {3}; ' \ 'relying on the Garbage Collector to either recycle or cancel the UOW.' \ .format(job_record.db_id, job_record.process_name, job_record.timeperiod, uow.state) self._log_message(INFO, job_record.process_name, job_record.timeperiod, msg) else: msg = 'Suppressed creating UOW for {0}@{1}; Job is in {2}; uow is in {3}' \ .format(job_record.process_name, job_record.timeperiod, job_record.state, uow.state) self._log_message(INFO, job_record.process_name, job_record.timeperiod, msg) timetable_tree = self.timetable.get_tree(job_record.process_name) timetable_tree.build_tree() def _process_terminal_state(self, job_record): """ method logs a warning message notifying that the job is no longer govern by this state machine """ msg = 'Job {0} for {1}@{2} is in the terminal state {3}, ' \ 'and is no further govern by the State Machine {4}' \ .format(job_record.db_id, job_record.process_name, job_record.timeperiod, job_record.state, self.name) self._log_message(WARNING, job_record.process_name, job_record.timeperiod, msg) def manage_job_with_blocking_children(self, job_record): """ method will trigger job processing only if: - all children are finished (STATE_PROCESSED, STATE_SKIPPED, STATE_NOOP) - all external dependencies are finalized (STATE_PROCESSED, STATE_SKIPPED, STATE_NOOP) """ is_job_finalizable = self.timetable.is_job_record_finalizable(job_record) if is_job_finalizable: self.manage_job(job_record) else: msg = 'Job {0}@{1} is blocked by unprocessed children/dependencies. Waiting another tick' \ .format(job_record.process_name, job_record.timeperiod) self._log_message(INFO, job_record.process_name, job_record.timeperiod, msg) def manage_job_with_blocking_dependencies(self, job_record): """ method will trigger job processing only if: - all dependencies are in [STATE_PROCESSED, STATE_NOOP] NOTICE: method will transfer current job into STATE_SKIPPED if any dependency is in STATE_SKIPPED """ composite_state = self.timetable.dependent_on_composite_state(job_record) assert isinstance(composite_state, NodesCompositeState) if composite_state.all_processed: self.manage_job(job_record) elif composite_state.skipped_present: # As soon as among <dependent on> periods are in STATE_SKIPPED # there is very little sense in waiting for them to become STATE_PROCESSED # Skip this timeperiod itself job_record.state = job.STATE_SKIPPED self.job_dao.update(job_record) self.mq_transmitter.publish_job_status(job_record) msg = 'Job {0}@{1} is blocked by STATE_SKIPPED dependencies. ' \ 'Transferred the job to STATE_SKIPPED'.format(job_record.process_name, job_record.timeperiod) self._log_message(WARNING, job_record.process_name, job_record.timeperiod, msg) else: msg = 'Job {0}@{1} is blocked by unprocessed dependencies. Waiting another tick' \ .format(job_record.process_name, job_record.timeperiod) self._log_message(INFO, job_record.process_name, job_record.timeperiod, msg) def manage_job(self, job_record): """ method main duty - is to _avoid_ publishing another unit_of_work, if previous was not yet processed In case the State Machine sees that the unit_of_work already exist it could either: - update boundaries of the processing; republish - wait another tick; idle """ assert isinstance(job_record, Job) try: if self._is_noop_timeperiod(job_record.process_name, job_record.timeperiod): self._process_noop_timeperiod(job_record) return if job_record.is_embryo: self._process_state_embryo(job_record) elif job_record.is_in_progress: self._process_state_in_progress(job_record) elif job_record.is_final_run: self._process_state_final_run(job_record) elif job_record.is_skipped: self._process_terminal_state(job_record) elif job_record.is_processed: self._process_terminal_state(job_record) elif job_record.is_noop: self._process_terminal_state(job_record) else: msg = 'Unknown state {0} of the job {1}'.format(job_record.state, job_record.db_id) self._log_message(ERROR, job_record.process_name, job_record.timeperiod, msg) except LookupError as e: job_record.number_of_failures += 1 self.job_dao.update(job_record) self.timetable.skip_if_needed(job_record) msg = 'Increasing fail counter for Job {0}@{1}, because of: {2}' \ .format(job_record.process_name, job_record.timeperiod, e) self._log_message(WARNING, job_record.process_name, job_record.timeperiod, msg) finally: self.mq_transmitter.publish_job_status(job_record) def reprocess_job(self, job_record): """ method marks given job for reprocessing: - if no UOW was bind with the job record - it is transferred to STATE_EMBRYO only - if related UOW was not finished - we mark the UOW as STATE_CANCELED and pass the job record thru the _process_in_progress routine - otherwise job record is passed thru the _process_in_progress routine """ original_job_state = job_record.state job_record.number_of_failures = 0 if not job_record.related_unit_of_work: job_record.state = job.STATE_EMBRYO self.job_dao.update(job_record) else: uow = self.uow_dao.get_one(job_record.related_unit_of_work) if not uow.is_finished: uow.state = unit_of_work.STATE_CANCELED uow.submitted_at = datetime.utcnow() self.uow_dao.update(uow) self._process_state_in_progress(job_record) msg = 'Reprocessed Job {0} for {1}@{2}: state transfer {3} -> {4};' \ .format(job_record.db_id, job_record.process_name, job_record.timeperiod, original_job_state, job_record.state) self._log_message(WARNING, job_record.process_name, job_record.timeperiod, msg) def skip_job(self, job_record): """ method transfers: - given job into STATE_SKIPPED if it is not not in finished state - UOW into STATE_CANCELED if it is not in finished state """ original_job_state = job_record.state if not job_record.is_finished: job_record.state = job.STATE_SKIPPED self.job_dao.update(job_record) if job_record.related_unit_of_work: uow = self.uow_dao.get_one(job_record.related_unit_of_work) if not uow.is_finished: uow.state = unit_of_work.STATE_CANCELED uow.submitted_at = datetime.utcnow() self.uow_dao.update(uow) msg = 'Skipped Job {0} for {1}@{2}: state transfer {3} -> {4};' \ .format(job_record.db_id, job_record.process_name, job_record.timeperiod, original_job_state, job_record.state) self._log_message(WARNING, job_record.process_name, job_record.timeperiod, msg) def create_job(self, process_name, timeperiod): """ method creates a job record in STATE_EMBRYO for given process_name and timeperiod :returns: created job record of type <Job>""" job_record = Job() job_record.state = job.STATE_EMBRYO job_record.timeperiod = timeperiod job_record.process_name = process_name self.job_dao.update(job_record) self.logger.info('Created Job {0} for {1}@{2}' .format(job_record.db_id, job_record.process_name, job_record.timeperiod)) return job_record def update_job(self, job_record, uow, new_state): """ method updates job record with a new unit_of_work and new state""" original_job_state = job_record.state job_record.state = new_state job_record.related_unit_of_work = uow.db_id self.job_dao.update(job_record) msg = 'Updated Job {0} for {1}@{2}: state transfer {3} -> {4};' \ .format(job_record.db_id, job_record.process_name, job_record.timeperiod, original_job_state, new_state) self._log_message(INFO, job_record.process_name, job_record.timeperiod, msg)
class Timetable(object): """ Timetable holds all known process trees, where every node presents a timeperiod-driven job""" def __init__(self, logger): self.lock = RLock() self.logger = logger self.uow_dao = UnitOfWorkDao(self.logger) self.job_dao = JobDao(self.logger) self.reprocess = dict() # self.trees contain all of the trees and manages much of their life cycle # remember to enlist there all trees the system is working with self.trees = self._construct_trees_from_context() self._register_callbacks() self._register_dependencies() self.load_tree() self.build_trees() self.validate() def _construct_trees_from_context(self): trees = dict() for tree_name, context_entry in context.timetable_context.items(): tree = MultiLevelTree(process_names=context_entry.enclosed_processes, tree_name=tree_name, mx_name=context_entry.mx_name, mx_page=context_entry.mx_page) trees[tree_name] = tree return trees def _register_dependencies(self): """ register dependencies between trees""" for tree_name, context_entry in context.timetable_context.items(): tree = self.trees[tree_name] assert isinstance(tree, MultiLevelTree) for dependent_on in context_entry.dependent_on: dependent_on_tree = self.trees[dependent_on] assert isinstance(dependent_on_tree, MultiLevelTree) tree.register_dependent_on(dependent_on_tree) def _register_callbacks(self): """ register logic that reacts on reprocessing request and create embryo timetable record request""" # reprocessing request for tree_name, tree in self.trees.items(): tree.register_reprocess_callback(self._callback_reprocess) # skip request for tree_name, tree in self.trees.items(): tree.register_skip_callback(self._callback_skip) # callbacks register for tree_name, tree in self.trees.items(): tree.register_create_callbacks(self._callback_create_job_record) # *** Call-back methods *** def _find_dependant_trees(self, tree_obj): """ returns list of trees that are dependent_on given tree_obj """ dependant_trees = [] for tree_name, tree in self.trees.items(): if tree_obj in tree.dependent_on: dependant_trees.append(tree) return dependant_trees def _find_dependant_tree_nodes(self, node_a): dependant_nodes = set() for tree_b in self._find_dependant_trees(node_a.tree): node_b = node_a.find_counterpart_in(tree_b) if node_b is None: continue dependant_nodes.add(node_b) return dependant_nodes def _reprocess_single_tree_node(self, tree_node): """ is called from tree to answer reprocessing request. It is possible that timetable record will be transferred to STATE_IN_PROGRESS with no related unit_of_work""" uow_id = tree_node.job_record.related_unit_of_work if uow_id is not None: tree_node.job_record.state = job.STATE_IN_PROGRESS uow = self.uow_dao.get_one(uow_id) uow.state = unit_of_work.STATE_INVALID uow.number_of_retries = 0 uow.created_at = datetime.utcnow() self.uow_dao.update(uow) msg = 'Transferred job record %s for %s in timeperiod %s to %s; Transferred unit_of_work to %s' \ % (tree_node.job_record.db_id, tree_node.process_name, tree_node.job_record.timeperiod, tree_node.job_record.state, uow.state) if tree_node.process_name not in self.reprocess: self.reprocess[tree_node.process_name] = dict() self.reprocess[tree_node.process_name][tree_node.timeperiod] = tree_node else: tree_node.job_record.state = job.STATE_EMBRYO msg = 'Transferred job record %s for %s in timeperiod %s to %s;' \ % (tree_node.job_record.db_id, tree_node.process_name, tree_node.job_record.timeperiod, tree_node.job_record.state) tree_node.job_record.number_of_failures = 0 self.job_dao.update(tree_node.job_record) self.logger.warn(msg) tree_node.add_log_entry([datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), msg]) @thread_safe def _callback_reprocess(self, tree_node): """ is called from tree to answer reprocessing request. It is possible that job record will be transferred to STATE_IN_PROGRESS with no related unit_of_work""" if (tree_node.job_record.is_embryo and tree_node.job_record.number_of_failures == 0) \ or (tree_node.process_name in self.reprocess and tree_node.timeperiod in self.reprocess[tree_node.process_name]): # the node has already been marked for re-processing or does not require one pass else: self._reprocess_single_tree_node(tree_node) reprocessing_nodes = self._find_dependant_tree_nodes(tree_node) for node in reprocessing_nodes: node.request_reprocess() @thread_safe def _callback_skip(self, tree_node): """ is called from tree to answer skip request""" tree_node.job_record.state = job.STATE_SKIPPED uow_id = tree_node.job_record.related_unit_of_work if uow_id is not None: uow = self.uow_dao.get_one(uow_id) uow.state = unit_of_work.STATE_CANCELED self.uow_dao.update(uow) msg = 'Transferred job record %s in timeperiod %s to %s; Transferred unit_of_work to %s' \ % (tree_node.job_record.db_id, tree_node.job_record.timeperiod, tree_node.job_record.state, uow.state) else: msg = 'Transferred job record %s in timeperiod %s to %s;' \ % (tree_node.job_record.db_id, tree_node.job_record.timeperiod, tree_node.job_record.state) self.job_dao.update(tree_node.job_record) self.logger.warn(msg) tree_node.add_log_entry([datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), msg]) if tree_node.process_name in self.reprocess \ and tree_node.timeperiod in self.reprocess[tree_node.process_name]: del self.reprocess[tree_node.process_name][tree_node.timeperiod] @thread_safe def _callback_create_job_record(self, tree_node): """ is called from a tree to create job record in STATE_EMBRYO and bind it to the given tree node""" try: job_record = self.job_dao.get_one(tree_node.process_name, tree_node.timeperiod) except LookupError: job_record = Job() job_record.state = job.STATE_EMBRYO job_record.timeperiod = tree_node.timeperiod job_record.process_name = tree_node.process_name tr_id = self.job_dao.update(job_record) self.logger.info('Created job record %s, with timeperiod %s for process %s' % (str(tr_id), tree_node.timeperiod, tree_node.process_name)) tree_node.job_record = job_record # *** Tree-manipulation methods *** @thread_safe def get_tree(self, process_name): """ return tree that is managing time-periods for given process""" for tree_name, tree in self.trees.items(): if process_name in tree: return tree @thread_safe def _build_tree_by_level(self, collection_name, since): """ method iterated thru all documents in all job collections and builds a tree of known system state""" try: unsupported_records = dict() job_records = self.job_dao.get_all(collection_name, since) for job_record in job_records: tree = self.get_tree(job_record.process_name) if tree is not None: tree.update_node(job_record) else: unsupported_records[job_record.process_name] = \ unsupported_records.get(job_record.process_name, 0) + 1 for name, counter in unsupported_records.items(): self.logger.warn('Skipping %r Job records for %s as no tree is handling it.' % (counter, name)) except LookupError: self.logger.warn('No Job Records in %s.' % str(collection_name)) @thread_safe def load_tree(self): """ method iterates thru all objects older than synergy_start_timeperiod parameter in job collections and loads them into this timetable""" timeperiod = settings.settings['synergy_start_timeperiod'] yearly_timeperiod = time_helper.cast_to_time_qualifier(QUALIFIER_YEARLY, timeperiod) monthly_timeperiod = time_helper.cast_to_time_qualifier(QUALIFIER_MONTHLY, timeperiod) daily_timeperiod = time_helper.cast_to_time_qualifier(QUALIFIER_DAILY, timeperiod) hourly_timeperiod = time_helper.cast_to_time_qualifier(QUALIFIER_HOURLY, timeperiod) self._build_tree_by_level(COLLECTION_JOB_HOURLY, since=hourly_timeperiod) self._build_tree_by_level(COLLECTION_JOB_DAILY, since=daily_timeperiod) self._build_tree_by_level(COLLECTION_JOB_MONTHLY, since=monthly_timeperiod) self._build_tree_by_level(COLLECTION_JOB_YEARLY, since=yearly_timeperiod) @thread_safe def build_trees(self): """ method iterates thru all trees and ensures that all time-period nodes are created up till <utc_now>""" for tree_name, tree in self.trees.items(): tree.build_tree() @thread_safe def validate(self): """validates that none of nodes in tree is improperly finalized and that every node has job_record""" for tree_name, tree in self.trees.items(): tree.validate() @thread_safe def dependent_on_composite_state(self, job_record): """ :return instance of <NodesCompositeState> """ assert isinstance(job_record, Job) tree = self.get_tree(job_record.process_name) node = tree.get_node(job_record.process_name, job_record.timeperiod) return node.dependent_on_composite_state() # *** Job manipulation methods *** @thread_safe def update_job_record(self, job_record, uow, new_state): """ method updates job record with a new unit_of_work and new state""" job_record.state = new_state job_record.related_unit_of_work = uow.db_id job_record.start_id = uow.start_id job_record.end_id = uow.end_id self.job_dao.update(job_record) tree = self.get_tree(job_record.process_name) tree.update_node(job_record) msg = 'Transferred job %s for %s in timeperiod %s to new state %s' \ % (job_record.db_id, job_record.timeperiod, job_record.process_name, new_state) self.logger.info(msg) self.add_log_entry(job_record.process_name, job_record.timeperiod, msg) @thread_safe def failed_on_processing_job_record(self, process_name, timeperiod): """method increases node's inner counter of failed processing if _skip_node logic returns True - node is set to STATE_SKIP""" tree = self.get_tree(process_name) node = tree.get_node(process_name, timeperiod) node.job_record.number_of_failures += 1 if tree._skip_the_node(node): node.request_skip() else: # job record is automatically updated in request_skip() # so if the node was not skipped - job record has to be updated explicitly self.job_dao.update(node.job_record) @thread_safe def get_next_job_record(self, process_name): """returns next job record to work on for given process""" if process_name in self.reprocess and len(self.reprocess[process_name]) > 0: timeperiod = sorted(self.reprocess[process_name].keys())[0] node = self.reprocess[process_name][timeperiod] del self.reprocess[process_name][timeperiod] else: tree = self.get_tree(process_name) node = tree.get_next_node(process_name) if node.job_record is None: node.request_embryo_job_record() return node.job_record @thread_safe def is_job_record_finalizable(self, job_record): """ :return True, if the node and all its children are either in STATE_PROCESSED or STATE_SKIPPED""" assert isinstance(job_record, Job) tree = self.get_tree(job_record.process_name) node = tree.get_node(job_record.process_name, job_record.timeperiod) return node.is_finalizable() @thread_safe def add_log_entry(self, process_name, timeperiod, msg): """ adds a log entry to the tree node. log entries has no persistence """ tree = self.get_tree(process_name) node = tree.get_node(process_name, timeperiod) node.add_log_entry([datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), msg])
class StateMachineFreerun(object): """ State Machine to handle freerun jobs/triggers """ def __init__(self, logger, name=STATE_MACHINE_FREERUN): self.name = name self.logger = logger self.mq_transmitter = MqTransmitter(self.logger) self.uow_dao = UnitOfWorkDao(self.logger) self.sfe_dao = FreerunProcessDao(self.logger) @with_reconnect def _log_message(self, level, freerun_entry, msg): """ method performs logging into log file and the freerun_entry """ self.logger.log(level, msg) assert isinstance(freerun_entry, FreerunProcessEntry) event_log = freerun_entry.event_log if len(event_log) > MAX_NUMBER_OF_EVENTS: del event_log[-1] event_log.insert(0, msg) self.sfe_dao.update(freerun_entry) @with_reconnect def _insert_uow(self, freerun_entry): """ creates unit_of_work and inserts it into the DB :raise DuplicateKeyError: if unit_of_work with given parameters already exists """ current_timeperiod = time_helper.actual_timeperiod(QUALIFIER_REAL_TIME) process_entry = context.process_context[freerun_entry.process_name] uow = UnitOfWork() uow.process_name = freerun_entry.schedulable_name uow.timeperiod = current_timeperiod uow.start_id = 0 uow.end_id = 0 uow.start_timeperiod = current_timeperiod uow.end_timeperiod = current_timeperiod uow.created_at = datetime.utcnow() uow.submitted_at = datetime.utcnow() uow.source = process_entry.source if hasattr(process_entry, 'source') else None uow.sink = process_entry.sink if hasattr(process_entry, 'sink') else None uow.state = unit_of_work.STATE_REQUESTED uow.unit_of_work_type = unit_of_work.TYPE_FREERUN uow.number_of_retries = 0 uow.arguments = freerun_entry.arguments uow.db_id = self.uow_dao.insert(uow) msg = 'Created: UOW {0} for {1}@{2}.' \ .format(uow.db_id, freerun_entry.schedulable_name, current_timeperiod) self._log_message(INFO, freerun_entry, msg) return uow def _publish_uow(self, freerun_entry, uow): self.mq_transmitter.publish_freerun_uow(freerun_entry, uow) msg = 'Published: UOW {0} for {1}.'.format(uow.db_id, freerun_entry.schedulable_name) self._log_message(INFO, freerun_entry, msg) def insert_and_publish_uow(self, freerun_entry): try: uow = self._insert_uow(freerun_entry) except DuplicateKeyError as e: msg = 'Duplication of UOW found for {0}. Error msg: {1}'.format(freerun_entry.schedulable_name, e) self._log_message(WARNING, freerun_entry, msg) uow = self.uow_dao.recover_from_duplicatekeyerror(e) if uow is not None: # publish the created/caught up unit_of_work self._publish_uow(freerun_entry, uow) freerun_entry.related_unit_of_work = uow.db_id self.sfe_dao.update(freerun_entry) else: msg = 'PERSISTENT TIER ERROR! Unable to locate UOW for {0}' \ .format(freerun_entry.schedulable_name) self._log_message(WARNING, freerun_entry, msg) def _process_state_embryo(self, freerun_entry): """ method creates unit_of_work and associates it with the FreerunProcessEntry """ self.insert_and_publish_uow(freerun_entry) def _process_state_in_progress(self, freerun_entry, uow): """ method that takes care of processing unit_of_work records in STATE_REQUESTED or STATE_IN_PROGRESS states""" self._publish_uow(freerun_entry, uow) def _process_terminal_state(self, freerun_entry, uow): """ method that takes care of processing unit_of_work records in STATE_PROCESSED, STATE_NOOP, STATE_INVALID, STATE_CANCELED states""" msg = 'UOW for {0} found in state {1}.'.format(freerun_entry.schedulable_name, uow.state) self._log_message(INFO, freerun_entry, msg) self.insert_and_publish_uow(freerun_entry) def manage_schedulable(self, freerun_entry): """ method main duty - is to _avoid_ publishing another unit_of_work, if previous was not yet processed In case the Scheduler sees that the unit_of_work is pending it will fire another WorkerMqRequest """ assert isinstance(freerun_entry, FreerunProcessEntry) if freerun_entry.related_unit_of_work is None: uow = None else: uow = self.uow_dao.get_one(freerun_entry.related_unit_of_work) try: if uow is None: self._process_state_embryo(freerun_entry) elif uow.is_requested or uow.is_in_progress: self._process_state_in_progress(freerun_entry, uow) elif uow.is_finished or uow.is_invalid: self._process_terminal_state(freerun_entry, uow) else: msg = 'Unknown state {0} of the UOW {1}'.format(uow.state, uow.db_id) self._log_message(ERROR, freerun_entry, msg) except LookupError as e: msg = 'Lookup issue for schedulable: {0} in timeperiod {1}, because of: {2}' \ .format(freerun_entry.db_id, uow.timeperiod, e) self._log_message(WARNING, freerun_entry, msg) def cancel_uow(self, freerun_entry): uow_id = freerun_entry.related_unit_of_work if uow_id is None: msg = 'cancel_uow: no related UOW for {0}'.format(freerun_entry.schedulable_name) else: uow = self.uow_dao.get_one(uow_id) uow.state = unit_of_work.STATE_CANCELED self.uow_dao.update(uow) msg = 'cancel_uow: canceled UOW {0} for {1}'.format(uow_id, freerun_entry.schedulable_name) self._log_message(INFO, freerun_entry, msg)
class GarbageCollector(object): """ GC is triggered directly by Synergy Scheduler. It scans for invalid or stalled unit_of_work and re-triggers them. GC is vital for the health of the system. Deployment with no GC is considered invalid """ def __init__(self, scheduler): self.logger = get_logger(PROCESS_GC, append_to_console=False, redirect_stdstream=False) self.managed_handlers = scheduler.managed_handlers self.mq_transmitter = MqTransmitter(self.logger) self.timetable = scheduler.timetable self.lock = Lock() self.uow_dao = UnitOfWorkDao(self.logger) self.reprocess_uows = collections.defaultdict(PriorityQueue) self.timer = RepeatTimer(settings.settings['gc_run_interval'], self._run) @thread_safe def scan_uow_candidates(self): """ method performs two actions: - enlist stale or invalid units of work into reprocessing queue - cancel UOWs that are older than 2 days and have been submitted more than 1 hour ago """ try: since = settings.settings['synergy_start_timeperiod'] uow_list = self.uow_dao.get_reprocessing_candidates(since) except LookupError as e: self.logger.info('flow: no UOW candidates found for reprocessing: {0}'.format(e)) return for uow in uow_list: try: if uow.process_name not in self.managed_handlers: self.logger.debug('process {0} is not known to the Synergy Scheduler. Skipping its UOW.' .format(uow.process_name)) continue thread_handler = self.managed_handlers[uow.process_name] assert isinstance(thread_handler, ManagedThreadHandler) if not thread_handler.process_entry.is_on: self.logger.debug('process {0} is inactive. Skipping its UOW.'.format(uow.process_name)) continue entry = PriorityEntry(uow) if entry in self.reprocess_uows[uow.process_name]: # given UOW is already registered in the reprocessing queue continue # ASSUMPTION: UOW is re-created by a state machine during reprocessing # thus - any UOW older 2 days could be marked as STATE_CANCELED if datetime.utcnow() - uow.created_at > timedelta(hours=settings.settings['gc_life_support_hours']): self._cancel_uow(uow) continue # if the UOW has been idle for more than 1 hour - resubmit it if datetime.utcnow() - uow.submitted_at > timedelta(hours=settings.settings['gc_resubmit_after_hours'])\ or uow.is_invalid: # enlist the UOW into the reprocessing queue self.reprocess_uows[uow.process_name].put(entry) except Exception as e: self.logger.error('flow exception: {0}'.format(e), exc_info=True) def _flush_queue(self, q, ignore_priority=False): """ :param q: PriorityQueue instance holding GarbageCollector entries :param ignore_priority: If True - all GarbageCollector entries should be resubmitted If False - only those entries whose waiting time has expired will be resubmitted """ assert isinstance(q, PriorityQueue) current_timestamp = compute_release_time(lag_in_minutes=0) for _ in range(len(q)): entry = q.pop() assert isinstance(entry, PriorityEntry) if ignore_priority or entry.release_time < current_timestamp: self._resubmit_uow(entry.entry) else: q.put(entry) break @thread_safe def flush(self, ignore_priority=False): """ method iterates over each reprocessing queues and re-submits UOW whose waiting time has expired """ for process_name, q in self.reprocess_uows.items(): self._flush_queue(q, ignore_priority) @thread_safe def validate(self): """ method iterates over the reprocessing queue and synchronizes state of every UOW with the DB should it change via the MX to STATE_CANCELED - remove the UOW from the queue """ for process_name, q in self.reprocess_uows.items(): if not q: continue invalid_entries = list() for entry in q.queue: assert isinstance(entry, PriorityEntry) uow = self.uow_dao.get_one(entry.entry.db_id) if uow.is_canceled: invalid_entries.append(entry) for entry in invalid_entries: q.queue.remove(entry) self.logger.info('reprocessing queue validated') @thread_safe def flush_one(self, process_name, ignore_priority=False): """ method iterates over the reprocessing queue for the given process and re-submits UOW whose waiting time has expired """ q = self.reprocess_uows[process_name] self._flush_queue(q, ignore_priority) def _resubmit_uow(self, uow): # re-read UOW from the DB, in case it was STATE_CANCELLED by MX uow = self.uow_dao.get_one(uow.db_id) if uow.is_canceled: self.logger.info('suppressed re-submission of UOW {0} for {1}@{2} in {3};' .format(uow.db_id, uow.process_name, uow.timeperiod, uow.state)) return if uow.is_invalid: uow.number_of_retries += 1 uow.state = unit_of_work.STATE_REQUESTED uow.submitted_at = datetime.utcnow() self.uow_dao.update(uow) self.mq_transmitter.publish_managed_uow(uow) self.logger.info('re-submitted UOW {0} for {1}@{2}; attempt {3}' .format(uow.db_id, uow.process_name, uow.timeperiod, uow.number_of_retries)) def _cancel_uow(self, uow): uow.state = unit_of_work.STATE_CANCELED self.uow_dao.update(uow) self.mq_transmitter.publish_uow_status(uow) self.logger.info('canceled UOW {0} for {1}@{2}; attempt {3}; created at {4}' .format(uow.db_id, uow.process_name, uow.timeperiod, uow.number_of_retries, uow.created_at)) def _run(self): try: self.logger.info('run {') self.logger.debug('step 1: validate existing queue entries') self.validate() self.logger.debug('step 2: scan reprocessing candidates') self.scan_uow_candidates() self.logger.debug('step 3: repost after timeout') self.flush() self.logger.debug('step 4: timetable housekeeping') self.timetable.build_trees() self.logger.debug('step 5: timetable validation') self.timetable.validate() except Exception as e: self.logger.error('GC run exception: {0}'.format(e)) finally: self.logger.info('}') def start(self): self.timer.start() def stop(self): self.timer.cancel()
class GarbageCollectorWorker(AbstractMqWorker): """ GC is triggered by an empty message from RabbitMQ. It scans for invalid or stalled unit_of_work and re-triggers them. GC is vital for the health of the system. Deployment with no running GC is considered invalid """ def __init__(self, process_name): super(GarbageCollectorWorker, self).__init__(process_name) self.lock = Lock() self.publishers = PublishersPool(self.logger) self.uow_dao = UnitOfWorkDao(self.logger) self.managed_dao = ManagedProcessDao(self.logger) self.managed_entries = dict() def __del__(self): try: self.logger.info('Closing Flopsy Publishers Pool...') self.publishers.close() except Exception as e: self.logger.error('Exception caught while closing Flopsy Publishers Pool: %s' % str(e)) super(GarbageCollectorWorker, self).__del__() @thread_safe def _mq_callback(self, message): """ method looks for stale or invalid units of work re-runs them if needed""" try: managed_entries = self.managed_dao.get_all() self._update_managed_entries(managed_entries) since = settings.settings['synergy_start_timeperiod'] uow_list = self.uow_dao.get_reprocessing_candidates(since) for uow in uow_list: if uow.process_name not in self.managed_entries: self.logger.debug('Process %r is not known to the Synergy Scheduler. Skipping its unit_of_work.' % uow.process_name) continue process_entry = self.managed_entries[uow.process_name] assert isinstance(process_entry, ManagedProcessEntry) if not process_entry.is_on: self.logger.debug('Process %r is inactive at the Synergy Scheduler. Skipping its unit_of_work.' % uow.process_name) continue self._process_single_document(uow) except LookupError as e: self.logger.info('Normal behaviour. %r' % e) except Exception as e: self.logger.error('_mq_callback: %s' % str(e), exc_info=True) finally: self.consumer.acknowledge(message.delivery_tag) def _update_managed_entries(self, managed_entries): self.managed_entries = {me.process_name: me for me in managed_entries} def _process_single_document(self, uow): """ actually inspects UOW retrieved from the database""" repost = False if uow.is_invalid: repost = True elif uow.is_in_progress or uow.is_requested: last_activity = uow.started_at if last_activity is None: last_activity = uow.created_at if datetime.utcnow() - last_activity > timedelta(hours=REPOST_AFTER_HOURS): repost = True if repost: mq_request = SynergyMqTransmission(process_name=uow.process_name, unit_of_work_id=uow.db_id) if datetime.utcnow() - uow.created_at < timedelta(hours=LIFE_SUPPORT_HOURS): uow.state = unit_of_work.STATE_REQUESTED uow.number_of_retries += 1 self.uow_dao.update(uow) publisher = self.publishers.get(uow.process_name) publisher.publish(mq_request.document) publisher.release() self.logger.info('UOW marked for re-processing: process %s; timeperiod %s; id %s; attempt %d' % (uow.process_name, uow.timeperiod, uow.db_id, uow.number_of_retries)) self.performance_ticker.tracker.increment_success() else: uow.state = unit_of_work.STATE_CANCELED self.uow_dao.update(uow) publisher = self.publishers.get(QUEUE_UOW_REPORT) publisher.publish(mq_request.document) publisher.release() self.logger.info('UOW transferred to STATE_CANCELED: process %s; timeperiod %s; id %s; attempt %d' % (uow.process_name, uow.timeperiod, uow.db_id, uow.number_of_retries))
class AbstractUowAwareWorker(AbstractMqWorker): """ Abstract class is inherited by all workers/aggregators that are aware of unit_of_work and capable of processing it""" def __init__(self, process_name): super(AbstractUowAwareWorker, self).__init__(process_name) self.uow_dao = UnitOfWorkDao(self.logger) self.publishers = PublishersPool(self.logger) def __del__(self): self.publishers.close() super(AbstractUowAwareWorker, self).__del__() # **************** Abstract Methods ************************ def _init_performance_ticker(self, logger): self.performance_ticker = UowAwareTracker(logger) self.performance_ticker.start() def _process_uow(self, uow): """ :param uow: unit_of_work to process :return: a tuple (number of processed items/documents/etc, desired unit_of_work state) or None if None is returned then it is assumed that the return tuple is (0, unit_of_work.STATE_PROCESSED) :raise an Exception if the UOW shall be marked as STATE_INVALID """ raise NotImplementedError('_process_uow must be overridden in the child class %s' % self.__class__.__name__) def _clean_up(self): """ method is called from the *finally* clause and is suppose to clean up after the uow processing """ pass def _mq_callback(self, message): try: mq_request = SynergyMqTransmission.from_json(message.body) uow = self.uow_dao.get_one(mq_request.unit_of_work_id) if not uow.is_requested: # accept only UOW in STATE_REQUESTED self.logger.warn('Skipping unit_of_work: id %s; state %s;' % (str(message.body), uow.state), exc_info=False) self.consumer.acknowledge(message.delivery_tag) return except Exception: self.logger.error('Safety fuse. Can not identify unit_of_work %s' % str(message.body), exc_info=True) self.consumer.acknowledge(message.delivery_tag) return try: uow.state = unit_of_work.STATE_IN_PROGRESS uow.started_at = datetime.utcnow() self.uow_dao.update(uow) self.performance_ticker.start_uow(uow) result = self._process_uow(uow) if result is None or isinstance(result, str): number_of_aggregated_objects, target_state = 0, unit_of_work.STATE_PROCESSED else: number_of_aggregated_objects, target_state = result uow.number_of_aggregated_documents = number_of_aggregated_objects uow.number_of_processed_documents = self.performance_ticker.per_job uow.finished_at = datetime.utcnow() uow.state = target_state self.uow_dao.update(uow) self.performance_ticker.finish_uow() except Exception as e: fresh_uow = self.uow_dao.get_one(mq_request.unit_of_work_id) self.performance_ticker.cancel_uow() if fresh_uow.is_canceled: self.logger.warn('unit_of_work: id %s was likely marked by MX as SKIPPED. ' 'No unit_of_work update is performed.' % str(message.body), exc_info=False) else: self.logger.error('Safety fuse while processing unit_of_work %s in timeperiod %s : %r' % (message.body, uow.timeperiod, e), exc_info=True) uow.state = unit_of_work.STATE_INVALID self.uow_dao.update(uow) finally: self.consumer.acknowledge(message.delivery_tag) self.consumer.close() self._clean_up() try: publisher = self.publishers.get(QUEUE_UOW_REPORT) publisher.publish(mq_request.document) publisher.release() self.logger.info('Published unit_of_work status report into %s queue' % QUEUE_UOW_REPORT) except Exception: self.logger.error('Error on unit_of_work status report publishing', exc_info=True)
class FreerunActionHandler(AbstractActionHandler): def __init__(self, request, **values): super(FreerunActionHandler, self).__init__(request, **values) self.process_name = self.request_arguments.get('process_name') self.entry_name = self.request_arguments.get('entry_name') self.freerun_process_dao = FreerunProcessDao(self.logger) self.uow_dao = UnitOfWorkDao(self.logger) self.is_request_valid = True if self.process_name and self.entry_name else False if self.is_request_valid: self.process_name = self.process_name.strip() self.entry_name = self.entry_name.strip() self.is_requested_state_on = 'is_on' in self.request_arguments and self.request_arguments['is_on'] @AbstractActionHandler.thread_handler.getter def thread_handler(self): handler_key = (self.process_name, self.entry_name) return self.scheduler.freerun_handlers[handler_key] @AbstractActionHandler.process_entry.getter def process_entry(self): return self.thread_handler.process_entry @valid_action_request def action_cancel_uow(self): uow_id = self.process_entry.related_unit_of_work if uow_id is None: resp = {'response': 'no related unit_of_work'} else: uow = self.uow_dao.get_one(uow_id) uow.state = unit_of_work.STATE_CANCELED self.uow_dao.update(uow) resp = {'response': 'updated unit_of_work %r' % uow_id} return resp @valid_action_request def action_get_uow(self): uow_id = self.process_entry.related_unit_of_work if uow_id is None: resp = {'response': 'no related unit_of_work'} else: resp = self.uow_dao.get_one(uow_id).document for key in resp: resp[key] = str(resp[key]) return resp @valid_action_request def action_get_log(self): return {'log': self.process_entry.log} @valid_action_request def action_update_entry(self): if 'insert_button' in self.request_arguments: process_entry = FreerunProcessEntry() process_entry.process_name = self.process_name process_entry.entry_name = self.entry_name if self.request_arguments['arguments']: arguments = self.request_arguments['arguments'].decode('unicode-escape') process_entry.arguments = json.loads(arguments) else: process_entry.arguments = {} process_entry.description = self.request_arguments['description'] process_entry.is_on = self.is_requested_state_on process_entry.trigger_frequency = self.request_arguments['trigger_frequency'] self.freerun_process_dao.update(process_entry) self.scheduler._register_process_entry(process_entry, self.scheduler.fire_freerun_worker) elif 'update_button' in self.request_arguments: is_interval_changed = self.process_entry.trigger_frequency != self.request_arguments['trigger_frequency'] if self.request_arguments['arguments']: arguments = self.request_arguments['arguments'].decode('unicode-escape') self.process_entry.arguments = json.loads(arguments) else: self.process_entry.arguments = {} self.process_entry.description = self.request_arguments['description'] self.process_entry.is_on = self.is_requested_state_on self.process_entry.trigger_frequency = self.request_arguments['trigger_frequency'] self.freerun_process_dao.update(self.process_entry) if is_interval_changed: self.action_change_interval() if self.process_entry.is_on != self.is_requested_state_on: if self.is_requested_state_on: self.action_activate_trigger() else: self.action_deactivate_trigger() elif 'delete_button' in self.request_arguments: handler_key = (self.process_name, self.entry_name) self.thread_handler.deactivate() self.freerun_process_dao.remove(handler_key) del self.scheduler.freerun_handlers[handler_key] elif 'cancel_button' in self.request_arguments: pass else: self.logger.error('Unknown action requested by schedulable_form.html') return self.reply_ok()