def child_process_ready(self, child_id): """ 发送子进程已经准备好被调度的信号 :param child_id: 子进程 ID :return: """ valve.send(signals, 'child_process_ready', sender=PipelineProcess, child_id=child_id)
def test_send_on_valve_is_none(self): kwargs_1 = {'1': 1} kwargs_2 = {'2': 2} valve.unload_valve_function() valve.send(mock_signal, 'signal_1', **kwargs_1) valve.send(mock_signal, 'signal_1', **kwargs_2) self.assertEqual(mock_signal.signal_1.history[0], kwargs_1) self.assertEqual(mock_signal.signal_1.history[1], kwargs_2) mock_signal.clear()
def batch_process_ready(self, process_id_list, pipeline_id): """ 发送批量唤醒进程的信号 :param process_id_list: 需要被唤醒的进程 ID 列表 :param pipeline_id: 这些进程相关的 root pipeline :return: """ valve.send(signals, 'batch_process_ready', sender=PipelineProcess, process_id_list=process_id_list, pipeline_id=pipeline_id)
def unfreeze(self): """ 解冻当前进程 :return: """ with transaction.atomic(): self.__class__.objects.select_for_update().get(id=self.id) self.is_frozen = False self.save(save_snapshot=False) valve.send(signals, 'process_unfreeze', sender=PipelineProcess, process_id=self.id)
def callback(self, callback_data, process_id): if not self.wait_callback: raise exceptions.InvalidOperationException( 'can\'t callback a poll schedule.') self.callback_data = callback_data self.save() valve.send(signals, 'schedule_ready', sender=ScheduleService, process_id=process_id, schedule_id=self.id, countdown=0)
def set_next_schedule(self): if self.wait_callback: raise exceptions.InvalidOperationException( 'can\'t set next schedule on callback schedule.') count_down = self.service_act.service.interval.next() self.is_scheduling = False self.save() ScheduleCeleryTask.objects.unbind(self.id) valve.send(signals, 'schedule_ready', sender=ScheduleService, process_id=self.process_id, schedule_id=self.id, countdown=count_down)
def test_send_on_closed(self): kwargs_1 = {'1': 1} kwargs_2 = {'2': 2} def is_valve_closed(): return True valve.unload_valve_function() valve.set_valve_function(is_valve_closed) valve.send(mock_signal, 'signal_1', **kwargs_1) valve.send(mock_signal, 'signal_1', **kwargs_2) self.assertEqual(len(mock_signal.signal_1.history), 0) mock_signal.clear() Signal.objects.all().delete()
def test_send_on_valve_opened(self): kwargs_1 = {'1': 1} kwargs_2 = {'2': 2} def is_valve_closed(): return False valve.unload_valve_function() valve.set_valve_function(is_valve_closed) valve.send(mock_signal, 'signal_1', **kwargs_1) valve.send(mock_signal, 'signal_1', **kwargs_2) self.assertEqual(mock_signal.signal_1.history[0], kwargs_1) self.assertEqual(mock_signal.signal_1.history[1], kwargs_2) mock_signal.clear()
def process_ready(self, process_id, current_node_id=None, call_from_child=False): """ 发送一个进程已经准备好被调度的信号 :param process_id: 已经准备好的进程 ID :param current_node_id: 下一个执行的节点的 ID(可用于失败跳过) :param call_from_child: 该信号是否由子进程发出 :return: """ valve.send(signals, 'process_ready', sender=PipelineProcess, process_id=process_id, current_node_id=current_node_id, call_from_child=call_from_child)
def test_open_valve(self): kwargs_1 = {'1': 1} kwargs_2 = {'2': 2} def valve_closed(): return True valve.unload_valve_function() valve.set_valve_function(valve_closed) valve.send(mock_signal, 'signal_1', **kwargs_1) valve.send(mock_signal, 'signal_1', **kwargs_2) self.assertEqual(len(mock_signal.signal_1.history), 0) valve.open_valve(mock_signal) self.assertEqual(mock_signal.signal_1.history[0], kwargs_1) self.assertEqual(mock_signal.signal_1.history[1], kwargs_2) mock_signal.clear()
def set_schedule(self, activity_id, service_act, process_id, version, parent_data): wait_callback = service_act.service.interval is None schedule = self.create(id="%s%s" % (activity_id, version), activity_id=activity_id, service_act=service_act, process_id=process_id, wait_callback=wait_callback, version=version) data_service.set_schedule_data(schedule.id, parent_data) if not wait_callback: count_down = service_act.service.interval.next() valve.send(signals, 'schedule_ready', sender=ScheduleService, process_id=process_id, schedule_id=schedule.id, countdown=count_down) return schedule
def pipeline_ready(self, process_id): valve.send(signals, 'pipeline_ready', sender=Pipeline, process_id=process_id)
def handle(self, process, element, status): success = False exception_occurred = False monitoring = False version = status.version root_pipeline = process.root_pipeline # rerun mode if status.loop > 1 and not element.on_retry(): element.prepare_rerun_data() process.top_pipeline.context.recover_variable() elif element.on_retry(): element.retry_at_current_exec() # set loop to data element.data.inputs._loop = status.loop - 1 element.data.outputs._loop = status.loop - 1 # pre output extract process.top_pipeline.context.extract_output(element) # hydrate inputs hydrate_node_data(element) if element.timeout: logger.info('node %s %s start timeout monitor, timeout: %s' % (element.id, version, element.timeout)) signals.service_activity_timeout_monitor_start.send( sender=element.__class__, node_id=element.id, version=version, root_pipeline_id=root_pipeline.id, countdown=element.timeout) monitoring = True # execute service try: success = element.execute(root_pipeline.data) except Exception as e: if element.error_ignorable: # ignore exception success = True exception_occurred = True element.ignore_error() ex_data = traceback.format_exc(e) element.data.outputs.ex_data = ex_data logger.error(ex_data) # process result if success is False: ex_data = element.data.get_one_of_outputs('ex_data') Status.objects.fail(element, ex_data) try: element.failure_handler(root_pipeline.data) except Exception as e: logger.error('failure_handler(%s) failed: %s' % (element.id, traceback.format_exc(e))) if monitoring: signals.service_activity_timeout_monitor_end.send( sender=element.__class__, node_id=element.id, version=version) logger.info('node %s %s timeout monitor revoke' % (element.id, version)) # send activity error signal valve.send(signals, 'activity_failed', sender=root_pipeline, pipeline_id=root_pipeline.id, pipeline_activity_id=element.id, subprocess_id_stack=process.subprocess_stack) return self.HandleResult(next_node=None, should_return=False, should_sleep=True) else: is_error_ignored = element.error_ignorable and not element.get_result_bit( ) if element.need_schedule( ) and not exception_occurred and not is_error_ignored: # write data before schedule Data.objects.write_node_data(element) # set schedule ScheduleService.objects.set_schedule( element.id, service_act=element.shell(), process_id=process.id, version=version, parent_data=process.top_pipeline.data) return self.HandleResult(next_node=None, should_return=True, should_sleep=True) process.top_pipeline.context.extract_output(element) error_ignorable = not element.get_result_bit() if monitoring: signals.service_activity_timeout_monitor_end.send( sender=element.__class__, node_id=element.id, version=version) logger.info('node %s %s timeout monitor revoke' % (element.id, version)) if not Status.objects.finish(element, error_ignorable): # has been forced failed return self.HandleResult(next_node=None, should_return=False, should_sleep=True) return self.HandleResult(next_node=element.next(), should_return=False, should_sleep=False)
def schedule(process_id, schedule_id): """ 调度服务主函数 :param process_id: 被调度的节点所属的 PipelineProcess :param schedule_id: 调度 ID :return: """ with schedule_exception_handler(process_id, schedule_id): ScheduleService.objects.filter(id=schedule_id).update( is_scheduling=True) sched_service = ScheduleService.objects.get(id=schedule_id) service_act = sched_service.service_act act_id = sched_service.activity_id version = sched_service.version if not Status.objects.filter(id=act_id, version=version).exists(): # forced failed logger.warning( 'schedule service failed, schedule(%s - %s) had been forced exit.' % (act_id, version)) sched_service.destroy() return # get data parent_data = get_schedule_parent_data(sched_service.id) if parent_data is None: raise exceptions.DataRetrieveError( 'child process(%s) retrieve parent_data error, sched_id: %s' % (process_id, schedule_id)) # schedule ex_data = None success = False try: success = service_act.schedule(parent_data, sched_service.callback_data) if success is None: success = True except Exception as e: if service_act.error_ignorable: success = True service_act.ignore_error() service_act.finish_schedule() ex_data = traceback.format_exc(e) logging.error(ex_data) sched_service.schedule_times += 1 set_schedule_data(sched_service.id, parent_data) # schedule failed if not success: if not Status.objects.transit( id=act_id, version=version, to_state=states.FAILED).result: # forced failed logger.warning( 'FAILED transit failed, schedule(%s - %s) had been forced exit.' % (act_id, version)) sched_service.destroy() return if service_act.timeout: signals.service_activity_timeout_monitor_end.send( sender=service_act.__class__, node_id=service_act.id, version=version) logger.info('node %s %s timeout monitor revoke' % (service_act.id, version)) Data.objects.write_node_data(service_act, ex_data=ex_data) with transaction.atomic(): process = PipelineProcess.objects.select_for_update().get( id=sched_service.process_id) if not process.is_alive: logger.info( 'pipeline %s has been revoked, status adjust failed.' % process.root_pipeline_id) return process.adjust_status() # send activity error signal try: service_act.schedule_fail() except Exception as e: logger.error('schedule_fail handler fail: %s' % traceback.format_exc(e)) signals.service_schedule_fail.send(sender=ScheduleService, activity_shell=service_act, schedule_service=sched_service, ex_data=ex_data) valve.send(signals, 'activity_failed', sender=process.root_pipeline, pipeline_id=process.root_pipeline_id, pipeline_activity_id=service_act.id, subprocess_id_stack=process.subprocess_stack) return # schedule execute finished or callback finished if service_act.is_schedule_done() or sched_service.wait_callback: error_ignorable = not service_act.get_result_bit() if not Status.objects.transit(id=act_id, version=version, to_state=states.FINISHED).result: # forced failed logger.warning( 'FINISHED transit failed, schedule(%s - %s) had been forced exit.' % (act_id, version)) sched_service.destroy() return if service_act.timeout: signals.service_activity_timeout_monitor_end.send( sender=service_act.__class__, node_id=service_act.id, version=version) logger.info('node %s %s timeout monitor revoke' % (service_act.id, version)) Data.objects.write_node_data(service_act) if error_ignorable: s = Status.objects.get(id=act_id) s.error_ignorable = True s.save() # sync parent data with transaction.atomic(): process = PipelineProcess.objects.select_for_update().get( id=sched_service.process_id) if not process.is_alive: logger.warning('schedule(%s - %s) revoked.' % (act_id, version)) sched_service.destroy() return process.top_pipeline.data.update_outputs( parent_data.get_outputs()) # extract outputs process.top_pipeline.context.extract_output(service_act) process.save(save_snapshot=True) # clear temp data delete_parent_data(sched_service.id) # save schedule service sched_service.finish() signals.service_schedule_success.send( sender=ScheduleService, activity_shell=service_act, schedule_service=sched_service) valve.send(signals, 'wake_from_schedule', sender=ScheduleService, process_id=sched_service.process_id, activity_id=sched_service.activity_id) else: sched_service.set_next_schedule()