def _process_state_in_progress(self, process_name, time_record, start_time): """ method that takes care of processing timetable records in STATE_IN_PROGRESS state""" end_time = time_helper.increment_time(process_name, start_time) actual_time = time_helper.actual_time(process_name) can_finalize_timerecord = self.timetable.can_finalize_timetable_record(process_name, time_record) uow_id = time_record.get_related_unit_of_work() uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id)) if start_time == actual_time or can_finalize_timerecord == False: if uow_obj.get_state() == UnitOfWorkEntry.STATE_INVALID\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_REQUESTED: # current uow has not been processed yet. update it self.update_scope_of_processing(process_name, uow_obj, start_time, end_time, time_record) else: # cls.STATE_IN_PROGRESS, cls.STATE_PROCESSED, cls.STATE_CANCELED # create new uow to cover new inserts self._compute_and_transfer_to_progress(process_name, start_time, end_time, time_record) elif start_time < actual_time and can_finalize_timerecord == True: # create new uow for FINAL RUN self._compute_and_transfer_to_final_run(process_name, start_time, end_time, time_record) else: msg = 'Time-record %s has timestamp from future %s vs current time %s'\ % (time_record.get_document()['_id'], start_time, actual_time) self._log_message(ERROR, process_name, time_record, msg)
def _process_state_in_progress(self, process_name, time_record, start_time): """ method that takes care of processing timetable records in STATE_IN_PROGRESS state""" end_time = time_helper.increment_time(process_name, start_time) actual_time = time_helper.actual_time(process_name) can_finalize_timerecord = self.timetable.can_finalize_timetable_record(process_name, time_record) uow_id = time_record.get_related_unit_of_work() uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id)) iteration = int(uow_obj.get_end_id()) try: if start_time == actual_time or can_finalize_timerecord == False: if uow_obj.get_state() == UnitOfWorkEntry.STATE_REQUESTED\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_IN_PROGRESS\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_INVALID: # Hadoop processing takes more than 1 tick of Scheduler # Let the Hadoop processing complete - do no updates to Scheduler records pass elif uow_obj.get_state() == UnitOfWorkEntry.STATE_PROCESSED\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_CANCELED: # create new uow to cover new inserts uow_obj = self.insert_uow(process_name, start_time, end_time, iteration + 1, time_record) self.timetable.update_timetable_record(process_name, time_record, uow_obj, TimeTableEntry.STATE_IN_PROGRESS) elif start_time < actual_time and can_finalize_timerecord == True: if uow_obj.get_state() == UnitOfWorkEntry.STATE_REQUESTED\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_IN_PROGRESS\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_INVALID: # Hadoop processing has not started yet # Let the Hadoop processing complete - do no updates to Scheduler records pass elif uow_obj.get_state() == UnitOfWorkEntry.STATE_PROCESSED\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_CANCELED: # create new uow for FINAL RUN uow_obj = self.insert_uow(process_name, start_time, end_time, iteration + 1, time_record) self.timetable.update_timetable_record(process_name, time_record, uow_obj, TimeTableEntry.STATE_FINAL_RUN) else: msg = 'Time-record %s has timestamp from future %s vs current time %s'\ % (time_record.get_document()['_id'], start_time, actual_time) self._log_message(ERROR, process_name, time_record, msg) except DuplicateKeyError as e: uow_obj = self.recover_from_duplicatekeyerror(e) if uow_obj is not None: self.timetable.update_timetable_record(process_name, time_record, uow_obj, time_record.get_state()) else: msg = 'MANUAL INTERVENTION REQUIRED! Unable to identify unit_of_work for %s in %s'\ % (process_name, time_record.get_timestamp()) self._log_message(ERROR, process_name, time_record, msg)