def _process_state_final_run(self, process_name, time_record):
        """method takes care of processing timetable records in STATE_FINAL_RUN state"""
        uow_id = time_record.get_related_unit_of_work()
        uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id))

        if uow_obj.get_state() == UnitOfWorkEntry.STATE_PROCESSED:
            self.timetable.update_timetable_record(process_name,
                                                   time_record,
                                                   uow_obj,
                                                   TimeTableEntry.STATE_PROCESSED)
            timetable_tree = self.timetable.get_tree(process_name)
            timetable_tree.build_tree()
            msg = 'Transferred time-record %s in timeperiod %s to STATE_PROCESSED for %s'\
                    % (time_record.get_document()['_id'], time_record.get_timestamp(), process_name)
        elif uow_obj.get_state() == UnitOfWorkEntry.STATE_CANCELED:
            self.timetable.update_timetable_record(process_name,
                                                   time_record,
                                                   uow_obj,
                                                   TimeTableEntry.STATE_SKIPPED)
            msg = 'Transferred time-record %s in timeperiod %s to STATE_SKIPPED for %s'\
                    % (time_record.get_document()['_id'], time_record.get_timestamp(), process_name)
        else:
            msg = 'Suppressed creating uow for %s in timeperiod %s; time_record is in %s; uow is in %s'\
                % (process_name, time_record.get_timestamp(), time_record.get_state(), uow_obj.get_state())
        self._log_message(INFO, process_name, time_record, msg)
Example #2
0
    def _callback_reprocess(self, process_name, timestamp, tree_node):
        """ is called from tree to answer reprocessing request.
        It is possible that timetable record will be transfered to STATE_IN_PROGRESS with no related unit_of_work"""
        uow_id = tree_node.time_record.get_related_unit_of_work()
        if uow_id is not None:
            tree_node.time_record.set_state(TimeTableEntry.STATE_IN_PROGRESS)
            uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id))
            uow_obj.set_state(uow_obj.STATE_INVALID)
            uow_obj.set_number_of_retries(0)
            uow_obj.set_created_at(datetime.utcnow())
            unit_of_work_helper.update(self.logger, uow_obj)
            msg = 'Transferred time-record %s in timeperiod %s to %s; Transferred unit_of_work to %s'\
                    % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(),
                       tree_node.time_record.get_state(), uow_obj.get_state())
        else:
            tree_node.time_record.set_state(TimeTableEntry.STATE_EMBRYO)
            msg = 'Transferred time-record %s in timeperiod %s to %s;'\
                    % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(),
                       tree_node.time_record.get_state())

        tree_node.time_record.set_number_of_failures(0)
        self._save_time_record(process_name, tree_node.time_record)
        self.logger.warning(msg)
        tree_node.add_log_entry([datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), msg])

        if process_name not in self.reprocess:
            self.reprocess[process_name] = dict()
        self.reprocess[process_name][timestamp] = tree_node
    def _process_state_in_progress(self, process_name, time_record, start_time):
        """ method that takes care of processing timetable records in STATE_IN_PROGRESS state"""
        end_time = time_helper.increment_time(process_name, start_time)
        actual_time = time_helper.actual_time(process_name)
        can_finalize_timerecord = self.timetable.can_finalize_timetable_record(process_name, time_record)
        uow_id = time_record.get_related_unit_of_work()
        uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id))

        if start_time == actual_time or can_finalize_timerecord == False:
            if uow_obj.get_state() == UnitOfWorkEntry.STATE_INVALID\
                or uow_obj.get_state() == UnitOfWorkEntry.STATE_REQUESTED:
                # current uow has not been processed yet. update it
                self.update_scope_of_processing(process_name, uow_obj, start_time, end_time, time_record)
            else:
                # cls.STATE_IN_PROGRESS, cls.STATE_PROCESSED, cls.STATE_CANCELED
                # create new uow to cover new inserts
                self._compute_and_transfer_to_progress(process_name, start_time, end_time, time_record)

        elif start_time < actual_time and can_finalize_timerecord == True:
            # create new uow for FINAL RUN
            self._compute_and_transfer_to_final_run(process_name, start_time, end_time, time_record)

        else:
            msg = 'Time-record %s has timestamp from future %s vs current time %s'\
                    % (time_record.get_document()['_id'], start_time, actual_time)
            self._log_message(ERROR, process_name, time_record, msg)
    def _mq_callback(self, message):
        """ try/except wrapper
        in case exception breaks the abstract method, this method:
        - catches the exception
        - logs the exception
        - marks unit of work as INVALID"""
        unit_of_work = None
        try:
            # @param object_id: ObjectId of the unit_of_work from mq
            object_id = ObjectId(message.body)
            unit_of_work = unit_of_work_helper.retrieve_by_id(self.logger, object_id)
            if unit_of_work.get_state() == UnitOfWorkEntry.STATE_CANCELED \
                or unit_of_work.get_state() == UnitOfWorkEntry.STATE_PROCESSED:
                # garbage collector might have reposted this UOW
                self.logger.warning('Skipping unit_of_work: id %s; state %s;' \
                                    % (str(message.body), unit_of_work.get_state()), exc_info=False)
                self.consumer.acknowledge(message.delivery_tag)
                return
        except Exception:
            self.logger.error('Safety fuse. Can not identify unit_of_work %s' % str(message.body), exc_info=True)
            self.consumer.acknowledge(message.delivery_tag)
            return

        try:
            start_timestamp = unit_of_work.get_start_timestamp()
            end_timestamp = unit_of_work.get_end_timestamp()

            unit_of_work.set_state(UnitOfWorkEntry.STATE_IN_PROGRESS)
            unit_of_work.set_started_at(datetime.utcnow())
            unit_of_work_helper.update(self.logger, unit_of_work)
            self.performance_ticker.start_uow(unit_of_work)

            self._start_process(start_timestamp, end_timestamp)
            code = None
            alive = True
            while alive:
                alive, code = self._poll_process()

            if code == 0:
                unit_of_work.set_number_of_processed_documents(self.performance_ticker.posts_per_job)
                unit_of_work.set_finished_at(datetime.utcnow())
                unit_of_work.set_state(UnitOfWorkEntry.STATE_PROCESSED)
                self.performance_ticker.finish_uow()
            else:
                unit_of_work.set_state(UnitOfWorkEntry.STATE_INVALID)
                self.performance_ticker.cancel_uow()

            self.logger.info('Hadoop Map/Reduce return code is %r' % code)
            unit_of_work_helper.update(self.logger, unit_of_work)
        except Exception as e:
            unit_of_work.set_state(UnitOfWorkEntry.STATE_INVALID)
            unit_of_work_helper.update(self.logger, unit_of_work)
            self.performance_ticker.cancel_uow()
            self.logger.error('Safety fuse while processing unit_of_work %s in timeperiod %s : %r'\
                              % (message.body, unit_of_work.get_timestamp(), e), exc_info=True)
        finally:
            self.consumer.acknowledge(message.delivery_tag)
            self.consumer.close()
    def _process_state_in_progress(self, process_name, time_record, start_time):
        """ method that takes care of processing timetable records in STATE_IN_PROGRESS state"""
        end_time = time_helper.increment_time(process_name, start_time)
        actual_time = time_helper.actual_time(process_name)
        can_finalize_timerecord = self.timetable.can_finalize_timetable_record(process_name, time_record)
        uow_id = time_record.get_related_unit_of_work()
        uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id))
        iteration = int(uow_obj.get_end_id())

        try:
            if start_time == actual_time or can_finalize_timerecord == False:
                if uow_obj.get_state() == UnitOfWorkEntry.STATE_REQUESTED\
                    or uow_obj.get_state() == UnitOfWorkEntry.STATE_IN_PROGRESS\
                    or uow_obj.get_state() == UnitOfWorkEntry.STATE_INVALID:
                    # Hadoop processing takes more than 1 tick of Scheduler
                    # Let the Hadoop processing complete - do no updates to Scheduler records
                    pass
                elif uow_obj.get_state() == UnitOfWorkEntry.STATE_PROCESSED\
                    or uow_obj.get_state() == UnitOfWorkEntry.STATE_CANCELED:
                    # create new uow to cover new inserts
                    uow_obj = self.insert_uow(process_name, start_time, end_time, iteration + 1, time_record)
                    self.timetable.update_timetable_record(process_name,
                                                           time_record,
                                                           uow_obj,
                                                           TimeTableEntry.STATE_IN_PROGRESS)

            elif start_time < actual_time and can_finalize_timerecord == True:
                if uow_obj.get_state() == UnitOfWorkEntry.STATE_REQUESTED\
                    or uow_obj.get_state() == UnitOfWorkEntry.STATE_IN_PROGRESS\
                    or uow_obj.get_state() == UnitOfWorkEntry.STATE_INVALID:
                    # Hadoop processing has not started yet
                    # Let the Hadoop processing complete - do no updates to Scheduler records
                    pass
                elif uow_obj.get_state() == UnitOfWorkEntry.STATE_PROCESSED\
                    or uow_obj.get_state() == UnitOfWorkEntry.STATE_CANCELED:
                    # create new uow for FINAL RUN
                    uow_obj = self.insert_uow(process_name, start_time, end_time, iteration + 1, time_record)
                    self.timetable.update_timetable_record(process_name,
                                                           time_record,
                                                           uow_obj,
                                                           TimeTableEntry.STATE_FINAL_RUN)
            else:
                msg = 'Time-record %s has timestamp from future %s vs current time %s'\
                        % (time_record.get_document()['_id'], start_time, actual_time)
                self._log_message(ERROR, process_name, time_record, msg)

        except DuplicateKeyError as e:
            uow_obj = self.recover_from_duplicatekeyerror(e)
            if uow_obj is not None:
                self.timetable.update_timetable_record(process_name,
                                                       time_record,
                                                       uow_obj,
                                                       time_record.get_state())
            else:
                msg = 'MANUAL INTERVENTION REQUIRED! Unable to identify unit_of_work for %s in %s'\
                % (process_name, time_record.get_timestamp())
                self._log_message(ERROR, process_name, time_record, msg)
Example #6
0
    def action_get_uow(self):
        resp = dict()
        timetable = self.mbean.timetable
        tree = timetable.get_tree(self.process_name)

        if tree is not None:
            self.timestamp = time_helper.cast_to_time_qualifier(self.process_name, self.timestamp)
            node = tree.get_node_by_process(self.process_name, self.timestamp)

            uow_id = node.time_record.get_related_unit_of_work()
            if uow_id is None:
                resp = {'response' : 'no related unit_of_work'}
            else:
                resp = unit_of_work_helper.retrieve_by_id(self.logger, uow_id).get_document()
                for key in resp:
                    resp[key] = str(resp[key])

        return resp
Example #7
0
    def _callback_skip(self, process_name, timestamp, tree_node):
        """ is called from tree to answer skip request"""
        tree_node.time_record.set_state(TimeTableEntry.STATE_SKIPPED)
        uow_id = tree_node.time_record.get_related_unit_of_work()
        if uow_id is not None:
            uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id))
            uow_obj.set_state(uow_obj.STATE_CANCELED)
            unit_of_work_helper.update(self.logger, uow_obj)
            msg = 'Transferred time-record %s in timeperiod %s to %s; Transferred unit_of_work to %s'\
                    % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(),
                       tree_node.time_record.get_state(), uow_obj.get_state())
        else:
            msg = 'Transferred time-record %s in timeperiod %s to %s;'\
                    % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(),
                       tree_node.time_record.get_state())

        self._save_time_record(process_name, tree_node.time_record)
        self.logger.warning(msg)
        tree_node.add_log_entry([datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), msg])

        if process_name in self.reprocess and timestamp in self.reprocess[process_name]:
            del self.reprocess[process_name][timestamp]
    def _mq_callback(self, message):
        """ try/except wrapper
        in case exception breaks the abstract method, this method:
        - catches the exception
        - logs the exception
        - marks unit of work as INVALID"""
        unit_of_work = None
        try:
            # @param object_id: ObjectId of the unit_of_work from mq
            object_id = ObjectId(message.body)
            unit_of_work = unit_of_work_helper.retrieve_by_id(self.logger, object_id)
            if unit_of_work.get_state() == UnitOfWorkEntry.STATE_CANCELED \
                or unit_of_work.get_state() == UnitOfWorkEntry.STATE_PROCESSED:
                # garbage collector might have reposted this UOW
                self.logger.warning('Skipping unit_of_work: id %s; state %s;' \
                                    % (str(message.body), unit_of_work.get_state()), exc_info=False)
                self.consumer.acknowledge(message.delivery_tag)
                return
        except Exception:
            self.logger.error('Safety fuse. Can not identify unit_of_work %s' % str(message.body), exc_info=True)
            self.consumer.acknowledge(message.delivery_tag)
            return

        try:
            start_id_obj = ObjectId(unit_of_work.get_start_id())
            end_id_obj = ObjectId(unit_of_work.get_end_id())
            start_timestamp = unit_of_work.get_start_timestamp()
            end_timestamp = unit_of_work.get_end_timestamp()

            unit_of_work.set_state(UnitOfWorkEntry.STATE_IN_PROGRESS)
            unit_of_work.set_started_at(datetime.utcnow())
            unit_of_work_helper.update(self.logger, unit_of_work)
            self.performance_ticker.start_uow(unit_of_work)

            bulk_threshold = settings['bulk_threshold']
            iteration = 0
            while True:
                source_collection = self._get_source_collection()
                if iteration == 0:
                    queue = { '_id' : { '$gte' : start_id_obj, '$lte' : end_id_obj } }
                else:
                    queue = { '_id' : { '$gt' : start_id_obj, '$lte' : end_id_obj } }

                if start_timestamp is not None and end_timestamp is not None:
                    # remove all accident objects that may be in [start_id_obj : end_id_obj] range
                    queue[AbstractModel.TIMESTAMP] = { '$gte' : start_timestamp, '$lt' : end_timestamp }

                cursor = source_collection.find(queue).sort('_id', ASCENDING).limit(bulk_threshold)
                count = cursor.count(with_limit_and_skip=True)
                if count == 0 and iteration == 0:
                    msg = 'No entries in %s at range [%s : %s]'\
                            % (str(source_collection.name), unit_of_work.get_start_id(), unit_of_work.get_end_id())
                    self.logger.warning(msg)
                    break
                else:
                    shall_continue, new_start_id = self._process_not_empty_cursor(cursor)
                    if shall_continue:
                        start_id_obj = new_start_id
                        iteration += 1
                    else:
                        break

            msg = 'Cursor exploited after %s iterations' % str(iteration)
            self.logger.info(msg)

            self.perform_post_processing(unit_of_work.get_timestamp())
            number_of_aggregated_objects = self._flush_aggregated_objects()
            unit_of_work.set_number_of_aggregated_documents(number_of_aggregated_objects)
            unit_of_work.set_number_of_processed_documents(self.performance_ticker.posts_per_job)
            unit_of_work.set_finished_at(datetime.utcnow())
            unit_of_work.set_state(UnitOfWorkEntry.STATE_PROCESSED)
            unit_of_work_helper.update(self.logger, unit_of_work)
            self.performance_ticker.finish_uow()
        except Exception as e:
            unit_of_work.set_state(UnitOfWorkEntry.STATE_INVALID)
            unit_of_work_helper.update(self.logger, unit_of_work)
            self.performance_ticker.cancel_uow()

            del self.aggregated_objects
            self.aggregated_objects = dict()
            gc.collect()

            self.logger.error('Safety fuse while processing unit_of_work %s in timeperiod %s : %r'\
                              % (message.body, unit_of_work.get_timestamp(), e), exc_info=True)
        finally:
            self.consumer.acknowledge(message.delivery_tag)
            self.consumer.close()