def _process_state_in_progress(self, process_name, time_record, start_time): """ method that takes care of processing timetable records in STATE_IN_PROGRESS state""" end_time = time_helper.increment_time(process_name, start_time) actual_time = time_helper.actual_time(process_name) can_finalize_timerecord = self.timetable.can_finalize_timetable_record(process_name, time_record) uow_id = time_record.get_related_unit_of_work() uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id)) if start_time == actual_time or can_finalize_timerecord == False: if uow_obj.get_state() == UnitOfWorkEntry.STATE_INVALID\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_REQUESTED: # current uow has not been processed yet. update it self.update_scope_of_processing(process_name, uow_obj, start_time, end_time, time_record) else: # cls.STATE_IN_PROGRESS, cls.STATE_PROCESSED, cls.STATE_CANCELED # create new uow to cover new inserts self._compute_and_transfer_to_progress(process_name, start_time, end_time, time_record) elif start_time < actual_time and can_finalize_timerecord == True: # create new uow for FINAL RUN self._compute_and_transfer_to_final_run(process_name, start_time, end_time, time_record) else: msg = 'Time-record %s has timestamp from future %s vs current time %s'\ % (time_record.get_document()['_id'], start_time, actual_time) self._log_message(ERROR, process_name, time_record, msg)
def _process_state_in_progress(self, process_name, time_record, start_time): """ method that takes care of processing timetable records in STATE_IN_PROGRESS state""" end_time = time_helper.increment_time(process_name, start_time) actual_time = time_helper.actual_time(process_name) can_finalize_timerecord = self.timetable.can_finalize_timetable_record(process_name, time_record) uow_id = time_record.get_related_unit_of_work() uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id)) iteration = int(uow_obj.get_end_id()) try: if start_time == actual_time or can_finalize_timerecord == False: if uow_obj.get_state() == UnitOfWorkEntry.STATE_REQUESTED\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_IN_PROGRESS\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_INVALID: # Hadoop processing takes more than 1 tick of Scheduler # Let the Hadoop processing complete - do no updates to Scheduler records pass elif uow_obj.get_state() == UnitOfWorkEntry.STATE_PROCESSED\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_CANCELED: # create new uow to cover new inserts uow_obj = self.insert_uow(process_name, start_time, end_time, iteration + 1, time_record) self.timetable.update_timetable_record(process_name, time_record, uow_obj, TimeTableEntry.STATE_IN_PROGRESS) elif start_time < actual_time and can_finalize_timerecord == True: if uow_obj.get_state() == UnitOfWorkEntry.STATE_REQUESTED\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_IN_PROGRESS\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_INVALID: # Hadoop processing has not started yet # Let the Hadoop processing complete - do no updates to Scheduler records pass elif uow_obj.get_state() == UnitOfWorkEntry.STATE_PROCESSED\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_CANCELED: # create new uow for FINAL RUN uow_obj = self.insert_uow(process_name, start_time, end_time, iteration + 1, time_record) self.timetable.update_timetable_record(process_name, time_record, uow_obj, TimeTableEntry.STATE_FINAL_RUN) else: msg = 'Time-record %s has timestamp from future %s vs current time %s'\ % (time_record.get_document()['_id'], start_time, actual_time) self._log_message(ERROR, process_name, time_record, msg) except DuplicateKeyError as e: uow_obj = self.recover_from_duplicatekeyerror(e) if uow_obj is not None: self.timetable.update_timetable_record(process_name, time_record, uow_obj, time_record.get_state()) else: msg = 'MANUAL INTERVENTION REQUIRED! Unable to identify unit_of_work for %s in %s'\ % (process_name, time_record.get_timestamp()) self._log_message(ERROR, process_name, time_record, msg)
def test_increment_time(self): stamps = ['2011010100', '2011010112', '2011010123'] expected = ['2011010101', '2011010113', '2011010200'] for i in range(3): assert time_helper.increment_time(process_context.PROCESS_SITE_HOURLY, stamps[i]) == expected[i] stamps = ['2011010100', '2011013100', '2010123100'] expected = ['2011010200', '2011020100', '2011010100'] for i in range(3): assert time_helper.increment_time(process_context.PROCESS_SITE_DAILY, stamps[i]) == expected[i] stamps = ['2011010000', '2011120000', '2011100000'] expected = ['2011020000', '2012010000', '2011110000'] for i in range(3): assert time_helper.increment_time(process_context.PROCESS_SITE_MONTHLY, stamps[i]) == expected[i] stamps = ['2011000000', '2012000000', '2099000000'] expected = ['2012000000', '2013000000', '2100000000'] for i in range(3): assert time_helper.increment_time(process_context.PROCESS_SITE_YEARLY, stamps[i]) == expected[i]
def _build_tree(self, rebuild, process_name, method_get_node): """method builds tree by iterating from the synergy_start_timestamp to current time and inserting corresponding nodes""" if rebuild or self.build_timestamp is None: timestamp = settings['synergy_start_timestamp'] timestamp = cast_to_time_qualifier(process_name, timestamp) else: timestamp = self.build_timestamp now = time_helper.datetime_to_synergy(process_name, datetime.utcnow()) while now >= timestamp: method_get_node(timestamp) timestamp = time_helper.increment_time(process_name, timestamp) self.build_timestamp = now
def _process_state_embryo(self, process_name, time_record, start_time): """ method that takes care of processing timetable records in STATE_EMBRYO state""" end_time = time_helper.increment_time(process_name, start_time) uow_obj = None try: uow_obj = self.insert_uow(process_name, start_time, end_time, 0, time_record) except DuplicateKeyError as e: uow_obj = self.recover_from_duplicatekeyerror(e) msg = 'Catching up with latest unit_of_work %s in timeperiod %s, because of: %r'\ % (process_name, time_record.get_timestamp(), e) self._log_message(WARNING, process_name, time_record, msg) if uow_obj is not None: self.timetable.update_timetable_record(process_name, time_record, uow_obj, TimeTableEntry.STATE_IN_PROGRESS) else: msg = 'MANUAL INTERVENTION REQUIRED! Unable to locate unit_of_work for %s in %s'\ % (process_name, time_record.get_timestamp()) self._log_message(WARNING, process_name, time_record, msg)
def _process_state_embryo(self, process_name, time_record, start_time): """ method that takes care of processing timetable records in STATE_EMBRYO state""" end_time = time_helper.increment_time(process_name, start_time) self._compute_and_transfer_to_progress(process_name, start_time, end_time, time_record)