def skip_node(node_id): """ skip a node :param node_id: :return: """ try: process = PipelineProcess.objects.get(current_node_id=node_id) except PipelineProcess.DoesNotExist: # can not skip subprocess return ActionResult(result=False, message='can\'t not skip a subprocess or this process has been revoked') # try to get next node = process.top_pipeline.node(node_id) if not isinstance(node, ServiceActivity): return ActionResult(result=False, message='can\'t skip this type of node') if hasattr(node, 'skippable') and not node.skippable: return ActionResult(result=False, message='this node is set to not be skippable, try retry it please.') # skip and write result bit action_result = Status.objects.skip(process, node) if not action_result.result: return action_result next_node_id = node.next().id # extract outputs and wake up process process.top_pipeline.context.extract_output(node) process.save() PipelineProcess.objects.process_ready(process_id=process.id, current_node_id=next_node_id) return action_result
def retry_node(node_id, inputs=None): """ retry a node :param node_id: :param inputs: :return: """ try: PipelineProcess.objects.get(current_node_id=node_id) except PipelineProcess.DoesNotExist: # can not retry subprocess return ActionResult(result=False, message='can\'t not retry a subprocess or this process has been revoked') process = PipelineProcess.objects.get(current_node_id=node_id) # try to get next node = process.top_pipeline.node(node_id) if not (isinstance(node, ServiceActivity) or isinstance(node, ParallelGateway)): return ActionResult(result=False, message='can\'t retry this type of node') if hasattr(node, 'retryable') and not node.retryable: return ActionResult(result=False, message='the node is set to not be retryable, try skip it please.') action_result = Status.objects.retry(process, node, inputs) if not action_result.result: return action_result # wake up process PipelineProcess.objects.process_ready(process_id=process.id) return action_result
def activity_callback(activity_id, callback_data): """ callback a schedule node :param activity_id: :param callback_data: :return: """ version = Status.objects.version_for(activity_id) times = 0 # it's possible that ScheduleService is not be set when callback make while times < 3: try: service = ScheduleService.objects.schedule_for(activity_id, version) break except ScheduleService.DoesNotExist as e: times += 1 time.sleep(times) if times >= 3: raise e try: process_id = PipelineProcess.objects.get(current_node_id=activity_id).id except PipelineProcess.DoesNotExist: return ActionResult(result=False, message='invalid operation, this node is finished or pipeline have been revoked') if service.is_finished: raise exceptions.InvalidOperationException('activity(%s) callback already finished' % activity_id) service.callback(callback_data, process_id) return ActionResult(result=True, message='success')
def skip_exclusive_gateway(node_id, flow_id): """ skip a failed exclusive gateway and appoint the flow to be pushed :param node_id: :param flow_id: :return: """ try: process = PipelineProcess.objects.get(current_node_id=node_id) except PipelineProcess.DoesNotExist: return ActionResult(result=False, message='invalid operation, this gateway is finished or pipeline have been revoked') exclusive_gateway = process.top_pipeline.node(node_id) if not isinstance(exclusive_gateway, ExclusiveGateway): return ActionResult(result=False, message='invalid operation, this node is not a exclusive gateway') next_node_id = exclusive_gateway.target_for_sequence_flow(flow_id).id action_result = Status.objects.skip(process, exclusive_gateway) if not action_result.result: return action_result # wake up process PipelineProcess.objects.process_ready(process_id=process.id, current_node_id=next_node_id) return action_result
def resume_node_appointment(node_id): """ make a appointment to resume a node :param node_id: :return: """ qs = PipelineProcess.objects.filter(current_node_id=node_id, is_sleep=True) if qs.exists(): # a process had sleep caused by pause reservation action_result = Status.objects.transit(id=node_id, to_state=states.READY, appoint=True) if not action_result.result: return action_result process = qs.first() Status.objects.recover_from_block(process.root_pipeline.id, process.subprocess_stack) PipelineProcess.objects.process_ready(process_id=process.id) return ActionResult(result=True, message='success') processing_sleep = SubProcessRelationship.objects.get_relate_process( subprocess_id=node_id) if processing_sleep.exists(): action_result = Status.objects.transit(id=node_id, to_state=states.RUNNING, appoint=True, is_pipeline=True) if not action_result.result: return action_result # processes had sleep caused by subprocess pause root_pipeline_id = processing_sleep.first().root_pipeline_id process_can_be_waked = filter(lambda p: p.can_be_waked(), processing_sleep) can_be_waked_ids = map(lambda p: p.id, process_can_be_waked) # get subprocess id which should be transited subprocess_to_be_transit = set() for process in process_can_be_waked: _, subproc_above = process.subproc_sleep_check() for subproc in subproc_above: subprocess_to_be_transit.add(subproc) Status.objects.recover_from_block(root_pipeline_id, subprocess_to_be_transit) PipelineProcess.objects.batch_process_ready( process_id_list=can_be_waked_ids, pipeline_id=root_pipeline_id) return ActionResult(result=True, message='success') return ActionResult(result=False, message='node not exists or not be executed yet')
def start(self, executor, check_workers=True): """ 启动当前流程 @param executor: 执行者 @param check_workers: 是否检测 worker 的状态 @return: 执行结果 """ with transaction.atomic(): instance = self.__class__.objects.select_for_update().get( id=self.id) if instance.is_started: return ActionResult( result=False, message='pipeline instance already started.') pipeline_data = instance.execution_data try: parser_cls = import_string(settings.PIPELINE_PARSER_CLASS) except ImportError: return ActionResult(result=False, message='invalid parser class: %s' % settings.PIPELINE_PARSER_CLASS) instance.start_time = timezone.now() instance.is_started = True instance.executor = executor parser = parser_cls(pipeline_data) pipeline = parser.parse(root_pipeline_data=get_pipeline_context( instance, obj_type='instance', data_type='data'), root_pipeline_context=get_pipeline_context( instance, obj_type='instance', data_type='context')) # calculate tree info instance.calculate_tree_info() instance.save() act_result = task_service.run_pipeline(pipeline) if not act_result.result: with transaction.atomic(): instance = self.__class__.objects.select_for_update().get( id=self.id) instance.start_time = None instance.is_started = False instance.executor = '' instance.save() return act_result
def wrapper(*args, **kwargs): if kwargs.get('check_workers', True): try: if not workers(): return ActionResult(result=False, message='can not find celery workers, please check worker status') except exceptions.RabbitMQConnectionError as e: return ActionResult(result=False, message='celery worker status check failed with message: %s, ' 'check rabbitmq status please' % e.message) except RedisConnectionError: return ActionResult(result=False, message='redis connection error, check redis status please') return func(*args, **kwargs)
def skip(self, process, node): s = Status.objects.get(id=node.id) # 一定要先取出来,不然 archive time 会被覆盖 if RERUN_MAX_LIMIT != 0 and s.loop > RERUN_MAX_LIMIT: return ActionResult( result=False, message='rerun times exceed max limit: {limit}, can not skip'. format(limit=RERUN_MAX_LIMIT)) action_res = self.transit(id=node.id, to_state=states.FINISHED, appoint=True) if not action_res.result: return action_res history = History.objects.record(s) LogEntry.objects.link_history(node_id=node.id, history_id=history.id) s.refresh_from_db() s.started_time = s.archived_time s.skip = True s.save() # 由于节点执行失败,数据可能尚未写入当前的 Data 对象,所以要在这里做一次写入操作 node.skip() Data.objects.write_node_data(node) self.recover_from_block(process.root_pipeline.id, process.subprocess_stack) signals.node_skip_call.send(sender=Status, process=process, node=node) return action_res
def retry(self, process, node, inputs): if RERUN_MAX_LIMIT != 0 and self.get(id=node.id).loop > RERUN_MAX_LIMIT: return ActionResult(result=False, message='rerun times exceed max limit: {limit}, can not retry'.format( limit=RERUN_MAX_LIMIT )) action_res = self.transit(id=node.id, to_state=states.READY, appoint=True) if not action_res.result: return action_res # add retry times s = Status.objects.get(id=node.id) s.version = uniqid() history = History.objects.record(s) LogEntry.objects.link_history(node_id=node.id, history_id=history.id) s.retry += 1 s.save() # update inputs if inputs: new_data = DataObject(inputs=inputs, outputs={}) node.data = new_data Data.objects.write_node_data(node) # mark node.next_exec_is_retry() self.recover_from_block(process.root_pipeline.id, process.subprocess_stack) signals.node_retry_ready.send(sender=Status, process=process, node=node) # because node may be updated process.save() return action_res
def revoke_pipeline(pipeline_id): """ revoke a pipeline :param pipeline_id: :return: """ action_result = Status.objects.transit(id=pipeline_id, to_state=states.REVOKED, is_pipeline=True, appoint=True) if not action_result.result: return action_result process = PipelineModel.objects.get(id=pipeline_id).process if not process: return ActionResult( result=False, message='relate process is none, this pipeline may be revoked.') with transaction.atomic(): PipelineProcess.objects.select_for_update().get(id=process.id) process.revoke_subprocess() process.destroy_all() return action_result
def wrapper(*args, **kwargs): if FunctionSwitch.objects.is_frozen(): return ActionResult( result=False, message='engine is frozen, can not perform operation') return func(*args, **kwargs)
def wrapper(*args, **kwargs): id_from_kwargs = kwargs.get('node_id') node_id = id_from_kwargs if id_from_kwargs else args[0] try: Status.objects.get(id=node_id) except Status.DoesNotExist: return ActionResult(result=False, message='node not exists or not be executed yet') return func(*args, **kwargs)
def forced_fail(node_id, kill=False, ex_data=''): """ forced fail a node :param node_id: :param kill: :param ex_data: :return: """ try: process = PipelineProcess.objects.get(current_node_id=node_id) except PipelineProcess.DoesNotExist: return ActionResult( result=False, message= 'invalid operation, this node is finished or pipeline have been revoked' ) node = process.top_pipeline.node(node_id) if not isinstance(node, ServiceActivity): return ActionResult(result=False, message='can\'t not forced fail this type of node') action_result = Status.objects.transit(node_id, to_state=states.FAILED) if not action_result.result: return action_result try: node.failure_handler(process.root_pipeline.data) except Exception: pass with transaction.atomic(): s = Status.objects.get(id=node.id) ScheduleService.objects.delete_schedule(s.id, s.version) Data.objects.forced_fail(node_id, ex_data) ProcessCeleryTask.objects.revoke(process.id, kill) process.sleep(adjust_status=True) s.version = uniqid.uniqid() s.save() return ActionResult(result=True, message='success')
def start_pipeline(pipeline_instance, check_workers=True): """ start a pipeline :param pipeline_instance: :return: """ Status.objects.prepare_for_pipeline(pipeline_instance) process = PipelineProcess.objects.prepare_for_pipeline(pipeline_instance) PipelineModel.objects.prepare_for_pipeline(pipeline_instance, process) PipelineModel.objects.pipeline_ready(process_id=process.id) return ActionResult(result=True, message='success')
def start_pipeline(pipeline_instance, check_workers=True, priority=PIPELINE_DEFAULT_PRIORITY): """ start a pipeline :param pipeline_instance: :param priority: :return: """ if priority > PIPELINE_MAX_PRIORITY or priority < PIPELINE_MIN_PRIORITY: raise exceptions.InvalidOperationException('pipeline priority must between [{min}, {max}]'.format( min=PIPELINE_MIN_PRIORITY, max=PIPELINE_MAX_PRIORITY )) Status.objects.prepare_for_pipeline(pipeline_instance) process = PipelineProcess.objects.prepare_for_pipeline(pipeline_instance) PipelineModel.objects.prepare_for_pipeline(pipeline_instance, process, priority) PipelineModel.objects.pipeline_ready(process_id=process.id) return ActionResult(result=True, message='success')
def start(self, executor, check_workers=True): """ 启动当前流程 @param executor: 执行者 @param check_workers: 是否检测 worker 的状态 @return: 执行结果 """ from pipeline.engine import api from pipeline.utils.context import get_pipeline_context from pipeline.engine.models import FunctionSwitch from pipeline.engine.core.api import workers if FunctionSwitch.objects.is_frozen(): return ActionResult(result=False, message='engine has been freeze, try later please') if check_workers: try: if not workers(): return ActionResult(result=False, message='can not find celery workers, please check worker status') except RabbitMQConnectionError as e: return ActionResult(result=False, message='celery worker status check failed with message: %s, ' 'check rabbitmq status please' % e.message) except RedisConnectionError: return ActionResult(result=False, message='redis connection error, check redis status please') with transaction.atomic(): instance = self.__class__.objects.select_for_update().get(id=self.id) if instance.is_started: return ActionResult(result=False, message='pipeline instance already started.') instance.start_time = timezone.now() instance.is_started = True instance.executor = executor # calculate tree info instance.calculate_tree_info() pipeline_data = instance.execution_data try: parser_cls = import_string(settings.PIPELINE_PARSER_CLASS) except ImportError: return ActionResult(result=False, message='invalid parser class: %s' % settings.PIPELINE_PARSER_CLASS) parser = parser_cls(pipeline_data) pipeline = parser.parse(get_pipeline_context(instance, 'instance')) instance.save() return api.start_pipeline(pipeline, check_workers=check_workers)
def transit(self, id, to_state, is_pipeline=False, appoint=False, start=False, name='', version=None, unchanged_pass=False): """ 尝试改变某个节点的状态 :param id: 节点 ID :param to_state: 目标状态 :param is_pipeline: 该节点是否是 pipeline :param appoint: 该动作是否由用户发起(非引擎内部操作) :param start: 是否刷新其开始时间 :param name: 节点名称 :param version: 节点版本 :param unchanged_pass: 当 to_state 与当前节点状态相同时则视为操作成功 :return: """ defaults = {'name': name, 'state': to_state, 'version': uniqid()} if start: defaults['started_time'] = timezone.now() status, created = self.get_or_create(id=id, defaults=defaults) # reservation or first creation if created: return ActionResult(result=True, message='success', extra=status) with transaction.atomic(): kwargs = {'id': id} if version: kwargs['version'] = version try: status = self.select_for_update().get(**kwargs) except Status.DoesNotExist: return ActionResult( result=False, message='node not exists or not be executed yet') if unchanged_pass and status.state == to_state: return ActionResult(result=True, message='success', extra=status) if states.can_transit(from_state=status.state, to_state=to_state, is_pipeline=is_pipeline, appoint=appoint): # 在冻结状态下不能改变 pipeline 的状态 if is_pipeline: subprocess_rel = SubProcessRelationship.objects.filter( subprocess_id=id) if subprocess_rel: process = PipelineProcess.objects.get( id=subprocess_rel[0].process_id) if process.is_frozen: return ActionResult( result=False, message= 'engine is frozen, can not perform operation') processes = PipelineProcess.objects.filter( root_pipeline_id=id) if processes and processes[0].is_frozen: return ActionResult( result=False, message= 'engine is frozen, can not perform operation') if name: status.name = name if start: status.started_time = timezone.now() if to_state in states.ARCHIVED_STATES: status.archived_time = timezone.now() # from FINISHED to RUNNING if states.is_rerunning(from_state=status.state, to_state=to_state): history = History.objects.record(status, is_rerunning=True) if history: LogEntry.objects.link_history(node_id=status.id, history_id=history.id) status.loop += 1 status.skip = False status.version = uniqid() status.state = to_state status.save() return ActionResult(result=True, message='success', extra=status) else: return ActionResult( result=False, message='can\'t transit state(%s) from %s to %s' % (id, status.state, to_state), extra=status)
class TestPipelineInstance(TestCase): def setUp(self): self.data = { u'activities': { u'node8fe2bb234d29860981a2bc7e6077': { u'retryable': True, u'component': { u'code': u'sleep_timer', u'data': { u'bk_timing': { u'hook': False, u'value': u'3' } } }, u'error_ignorable': False, u'id': u'node8fe2bb234d29860981a2bc7e6077', u'incoming': u'line67b0e8cc895b1b9f9e0413dc50d1', u'isSkipped': True, u'loop': None, u'name': u'\u5b9a\u65f6', u'optional': False, u'outgoing': u'line73943da9f6f17601a40dc46bd229', u'stage_name': u'\u6b65\u9aa41', u'type': u'ServiceActivity' } }, u'constants': { u'${ip}': { u'custom_type': u'input', u'desc': u'', u'index': 0, u'key': u'${ip}', u'name': u'ip', u'show_type': u'show', u'source_info': {}, u'source_tag': u'', u'source_type': u'custom', u'validation': u'^.+$', u'validator': [], u'value': u'' } }, u'end_event': { u'id': u'nodeade2061fe6e69dc5b64a588480a7', u'incoming': u'line73943da9f6f17601a40dc46bd229', u'name': u'', u'outgoing': u'', u'type': u'EmptyEndEvent' }, u'flows': { u'line67b0e8cc895b1b9f9e0413dc50d1': { u'id': u'line67b0e8cc895b1b9f9e0413dc50d1', u'is_default': False, u'source': u'nodedee24d10226c975f4d2c659cc29d', u'target': u'node8fe2bb234d29860981a2bc7e6077' }, u'line73943da9f6f17601a40dc46bd229': { u'id': u'line73943da9f6f17601a40dc46bd229', u'is_default': False, u'source': u'node8fe2bb234d29860981a2bc7e6077', u'target': u'nodeade2061fe6e69dc5b64a588480a7' } }, u'gateways': {}, u'outputs': [], u'start_event': { u'id': u'nodedee24d10226c975f4d2c659cc29d', u'incoming': u'', u'name': u'', u'outgoing': u'line67b0e8cc895b1b9f9e0413dc50d1', u'type': u'EmptyStartEvent' } } self.creator = 'start' self.template = PipelineTemplate.objects.create_model( self.data, creator=self.creator, template_id='1') self.instance = PipelineInstance.objects.create_instance( self.template, exec_data=self.data, creator=self.creator, instance_id='1') self.instance_2 = PipelineInstance.objects.create_instance( self.template, exec_data=self.data, creator=self.creator, instance_id='2') self.instance_3 = PipelineInstance.objects.create_instance( self.template, exec_data=self.data, creator=self.creator, instance_id='3') @mock.patch('pipeline.models.PipelineTemplate.objects.unfold_subprocess', mock.MagicMock()) def test_create_instance(self): creator = self.creator instance = self.instance self.assertIsNotNone(instance.snapshot) self.assertEqual(instance.snapshot.data, instance.data) self.assertEqual(creator, instance.creator) self.assertFalse(instance.is_started) self.assertFalse(instance.is_finished) self.assertFalse(instance.is_deleted) # test spread PipelineInstance.objects.create_instance(self.template, exec_data=self.data, creator=self.creator, instance_id='1') PipelineTemplate.objects.unfold_subprocess.assert_called_with( self.data) PipelineTemplate.objects.unfold_subprocess.reset_mock() PipelineInstance.objects.create_instance(self.template, exec_data=self.data, creator=self.creator, instance_id='1', spread=True) PipelineTemplate.objects.unfold_subprocess.assert_not_called() def test_create_instance__without_template(self): self.instance_4 = PipelineInstance.objects.create_instance( template=None, exec_data=self.data, creator=self.creator, instance_id='4') self.assertIsNone(self.instance_4.template) self.assertIsNone(self.instance_4.snapshot) self.assertIsNotNone(self.instance_4.execution_snapshot) def test_set_started(self): PipelineInstance.objects.set_started(self.instance.instance_id, self.creator) self.instance.refresh_from_db() self.assertTrue(self.instance.is_started) def test_set_finished(self): NodeRelationship.objects.build_relationship(self.instance.instance_id, self.instance.instance_id) Status.objects.create(id=self.instance.instance_id, state=states.FINISHED) for act_id in self.data[u'activities']: NodeRelationship.objects.build_relationship( self.instance.instance_id, act_id) Status.objects.create(id=act_id, state=states.FINISHED) NodeRelationship.objects.build_relationship( self.instance.instance_id, self.data[u'start_event']['id']) Status.objects.create(id=self.data[u'start_event']['id'], state=states.FINISHED) NodeRelationship.objects.build_relationship( self.instance.instance_id, self.data[u'end_event']['id']) Status.objects.create(id=self.data[u'end_event']['id'], state=states.FINISHED) print '###############################' print NodeRelationship.objects.filter( ancestor_id=self.instance.instance_id, distance__lte=99) PipelineInstance.objects.set_finished(self.instance.instance_id) self.instance.refresh_from_db() self.assertTrue(self.instance.is_finished) def test_delete_instance(self): PipelineInstance.objects.delete_model(self.instance.instance_id) i = PipelineInstance.objects.get(instance_id=self.instance.instance_id) self.assertTrue(i.is_deleted) PipelineInstance.objects.delete_model( [self.instance_2.instance_id, self.instance_3.instance_id]) i2 = PipelineInstance.objects.get( instance_id=self.instance_2.instance_id) i3 = PipelineInstance.objects.get( instance_id=self.instance_3.instance_id) self.assertTrue(i2.is_deleted) self.assertTrue(i3.is_deleted) @patch(PIPELINE_MODELS_TASK_SERVICE_RUN_PIPELINE, MagicMock(return_value=ActionResult(result=True, message=''))) @patch(PIPELINE_PIPELINE_INSTANCE_CALCULATE_TREE_INFO, MagicMock()) @patch(PIPELINE_PIPELINE_INSTANCE_IMPORT_STRING, MagicMock(retrun_value=MockParser)) def test_start__success(self): instance = PipelineInstance.objects.create_instance( self.template, exec_data=self.data, creator=self.creator) executor = 'token_1' instance.start(executor) instance.refresh_from_db() instance.calculate_tree_info.assert_called_once() self.assertTrue(instance.is_started) self.assertEqual(instance.executor, executor) self.assertIsNotNone(instance.start_time) task_service.run_pipeline.assert_called_once() @patch(PIPELINE_MODELS_TASK_SERVICE_RUN_PIPELINE, MagicMock(return_value=ActionResult(result=False, message=''))) @patch(PIPELINE_PIPELINE_INSTANCE_CALCULATE_TREE_INFO, MagicMock()) def test_start__already_started(self): instance = PipelineInstance.objects.create_instance( self.template, exec_data=self.data, creator=self.creator) instance.is_started = True instance.save() executor = 'token_1' instance.start(executor) instance.calculate_tree_info.assert_not_called() task_service.run_pipeline.assert_not_called() @patch(PIPELINE_MODELS_TASK_SERVICE_RUN_PIPELINE, MagicMock(return_value=ActionResult(result=False, message=''))) @patch(PIPELINE_PIPELINE_INSTANCE_CALCULATE_TREE_INFO, MagicMock()) @patch(PIPELINE_PIPELINE_INSTANCE_IMPORT_STRING, MagicMock(side_effect=ImportError())) def test_start__parser_cls_error(self): instance = PipelineInstance.objects.create_instance( self.template, exec_data=self.data, creator=self.creator) executor = 'token_1' instance.start(executor) instance.refresh_from_db() self.assertFalse(instance.is_started) self.assertEqual(instance.executor, '') self.assertIsNone(instance.start_time) instance.calculate_tree_info.assert_not_called() task_service.run_pipeline.assert_not_called() @patch(PIPELINE_MODELS_TASK_SERVICE_RUN_PIPELINE, MagicMock(return_value=ActionResult(result=False, message=''))) @patch(PIPELINE_PIPELINE_INSTANCE_CALCULATE_TREE_INFO, MagicMock()) @patch(PIPELINE_PIPELINE_INSTANCE_IMPORT_STRING, MagicMock(retrun_value=MockParser)) def test_start__task_service_call_fail(self): instance = PipelineInstance.objects.create_instance( self.template, exec_data=self.data, creator=self.creator) executor = 'token_1' instance.start(executor) instance.refresh_from_db() instance.calculate_tree_info.assert_called_once() task_service.run_pipeline.assert_called_once() self.assertFalse(instance.is_started) self.assertEqual(instance.executor, '') self.assertIsNone(instance.start_time) @patch(PIPELINE_MODELS_TASK_SERVICE_RUN_PIPELINE, MagicMock(return_value=ActionResult(result=False, message=''))) @patch(PIPELINE_PIPELINE_INSTANCE_CALCULATE_TREE_INFO, MagicMock(side_effect=Exception())) def test_start__error_occurred_before_task_service_call(self): instance = PipelineInstance.objects.create_instance( self.template, exec_data=self.data, creator=self.creator) executor = 'token_1' try: instance.start(executor) except Exception: pass instance.refresh_from_db() self.assertFalse(instance.is_started) self.assertEqual(instance.executor, '') self.assertIsNone(instance.start_time) task_service.run_pipeline.assert_not_called()