def get_status_dict_with_children(s, children): return { 'archived_time': s.archived_time, 'created_time': s.created_time, 'elapsed_time': calculate_elapsed_time(s.started_time, s.archived_time), 'error_ignorable': s.error_ignorable, 'id': s.id, 'loop': s.loop, 'name': s.name, 'retry': s.retry, 'skip': s.skip, 'started_time': s.started_time, 'state': s.state, 'version': s.version, 'children': children }
def test_migrate_instance(self): test_start = self.instance_in_pipeline.id - 1 test_end = self.instance_in_pipeline.id + 1 result = migrate_instance(test_start, test_end) kwargs = dict( instance_id=self.pipeline_instance.id, task_instance_id=self.taskflow_instance.id, atom_total=self.instance_in_pipeline.atom_total, subprocess_total=self.instance_in_pipeline.subprocess_total, gateways_total=self.instance_in_pipeline.gateways_total, project_id=self.taskflow_instance.project.id, category=self.task_template.category, template_id=self.pipeline_template.id, task_template_id=self.task_template.id, creator=self.pipeline_instance.creator, create_time=self.pipeline_instance.create_time, start_time=self.pipeline_instance.start_time, finish_time=self.pipeline_instance.finish_time, elapsed_time=calculate_elapsed_time( self.pipeline_instance.start_time, self.pipeline_instance.finish_time), create_method=self.taskflow_instance.create_method, ) TaskflowStatistics.objects.filter.assert_called_once_with( instance_id=kwargs["instance_id"]) TaskflowStatistics.objects.create.assert_called_once_with(**kwargs) self.assertTrue(result)
def get_status_tree(node_id, max_depth=1): """ get state and children states for a node :param node_id: :param max_depth: :return: """ rel_qs = NodeRelationship.objects.filter(ancestor_id=node_id, distance__lte=max_depth) if not rel_qs.exists(): raise exceptions.InvalidOperationException( 'node(%s) does not exist, may have not by executed' % node_id) descendants = map(lambda rel: rel.descendant_id, rel_qs) # remove root node descendants.remove(node_id) rel_qs = NodeRelationship.objects.filter(descendant_id__in=descendants, distance=1) targets = map(lambda rel: rel.descendant_id, rel_qs) root_status = Status.objects.filter(id=node_id).values().first() root_status['elapsed_time'] = calculate_elapsed_time( root_status['started_time'], root_status['archived_time']) status_map = {node_id: root_status} status_qs = Status.objects.filter(id__in=targets).values() for status in status_qs: status['elapsed_time'] = calculate_elapsed_time( status['started_time'], status['archived_time']) status_map[status['id']] = status relationships = [(s.ancestor_id, s.descendant_id) for s in rel_qs] for (parent_id, child_id) in relationships: if parent_id not in status_map: return parent_status = status_map[parent_id] child_status = status_map[child_id] child_status.setdefault('children', {}) parent_status.setdefault('children', {}).setdefault(child_id, child_status) return status_map[node_id]
def _format_status_time(status_tree): status_tree.setdefault("children", {}) status_tree.pop("created_time", "") started_time = status_tree.pop("started_time", None) archived_time = status_tree.pop("archived_time", None) if "elapsed_time" not in status_tree: status_tree["elapsed_time"] = calculate_elapsed_time(started_time, archived_time) status_tree["start_time"] = format_datetime(started_time) if started_time else None status_tree["finish_time"] = format_datetime(archived_time) if archived_time else None
def get_histories(self, identifier): histories = self.model.objects.filter(identifier=identifier).order_by('started_time') data = [{ 'started_time': item.started_time, 'archived_time': item.archived_time, 'elapsed_time': calculate_elapsed_time(item.started_time, item.archived_time), 'inputs': item.data.inputs, 'outputs': item.data.outputs, 'ex_data': item.data.ex_data, 'loop': item.loop, 'skip': item.skip } for item in histories] return data
def taskflowinstance_post_save_statistics_task(task_instance_id, created): try: taskflow_instance = TaskFlowInstance.objects.get(id=task_instance_id) # pipeline数据 pipeline_instance = taskflow_instance.pipeline_instance # template数据 task_template = TaskTemplate.objects.get( id=taskflow_instance.template_id) # 统计流程标准插件个数,子流程个数,网关个数 kwargs = { "instance_id": pipeline_instance.id, "project_id": taskflow_instance.project.id, "category": task_template.category, "template_id": task_template.pipeline_template.id, "task_template_id": task_template.id, "creator": pipeline_instance.creator, "create_time": pipeline_instance.create_time, "start_time": pipeline_instance.start_time, "finish_time": pipeline_instance.finish_time, "elapsed_time": calculate_elapsed_time(pipeline_instance.start_time, pipeline_instance.finish_time), "create_method": taskflow_instance.create_method, } kwargs["atom_total"], kwargs["subprocess_total"], kwargs[ "gateways_total"] = count_pipeline_tree_nodes( pipeline_instance.execution_data) if created: kwargs["task_instance_id"] = taskflow_instance.id TaskflowStatistics.objects.create(**kwargs) else: TaskflowStatistics.objects.update( task_instance_id=taskflow_instance.id, defaults=kwargs) return True except Exception as e: logger.exception(( "task_flow_post_handler save TaskflowStatistics[instance_id={instance_id}] " "raise error: {error}").format(instance_id=task_instance_id, error=e)) return False
def test_get_histories(self): def data_get(*args, **kwargs): data = Object() data.inputs = {'input': 'value'} data.outputs = {'outputs': 'value'} data.ex_data = 'ex_data' return data started = timezone.now() archived = timezone.now() status = MockStatus(skip=False) status.name = 'name' # no need microseconds status.started_time = datetime.datetime(year=started.year, month=started.month, day=started.day, hour=started.hour, minute=started.minute, second=started.second, tzinfo=started.tzinfo) status.archived_time = datetime.datetime(year=archived.year, month=archived.month, day=archived.day, hour=archived.hour, minute=archived.minute, second=archived.second, tzinfo=archived.tzinfo) with mock.patch(PIPELINE_DATA_GET, data_get): for i in range(3): History.objects.record(status) history_list = History.objects.get_histories(status.id) self.assertEqual(len(history_list), 3) for history in history_list: self.assertEqual(history['started_time'], status.started_time) self.assertEqual(history['archived_time'], status.archived_time) self.assertEqual( history['elapsed_time'], calculate_elapsed_time( status.started_time, status.archived_time)) self.assertEqual(history['inputs'], data_get().inputs) self.assertEqual(history['outputs'], data_get().outputs) self.assertEqual(history['ex_data'], data_get().ex_data) self.assertEqual(history['loop'], status.loop) self.assertEqual(history['skip'], status.skip) self.assertTrue('history_id' in history)
def pipeline_archive_statistics_task(instance_id): taskflow_instance = TaskFlowInstance.objects.get( pipeline_instance__instance_id=instance_id) # 更新taskflowinstance统计数据start_time finish_time elapsed_time taskflow_statistic = TaskflowStatistics.objects.filter( task_instance_id=taskflow_instance.id).first() if taskflow_statistic: start_time = taskflow_instance.pipeline_instance.start_time finish_time = taskflow_instance.pipeline_instance.finish_time taskflow_statistic.start_time = start_time taskflow_statistic.finish_time = finish_time taskflow_statistic.elapsed_time = calculate_elapsed_time( start_time, finish_time) taskflow_statistic.save() engine_ver = taskflow_instance.engine_ver # 获取任务实例执行树 cmd_dispatcher = TaskCommandDispatcher(engine_ver, taskflow_instance.id, taskflow_instance.pipeline_instance, taskflow_instance.project.id) status_tree_result = cmd_dispatcher.get_task_status() if not status_tree_result["result"]: logger.exception( "get task_status_result fail, taskflow_instace = {id}.".format( id=taskflow_instance.id)) return False # 删除原有标准插件执行数据 TaskflowExecutedNodeStatistics.objects.filter( instance_id=taskflow_instance.pipeline_instance.id).delete() data = taskflow_instance.pipeline_instance.execution_data try: component_list = recursive_collect_components_execution( activities=data[PE.activities], status_tree=status_tree_result["data"]["children"], task_instance=taskflow_instance, engine_ver=engine_ver, ) TaskflowExecutedNodeStatistics.objects.bulk_create(component_list) except Exception: logger.exception(( "pipeline_instance_handler save TaskflowExecuteNodeStatistics[instance_id={instance_id}] raise error" ).format(instance_id=instance_id)) return False return True
def test_calculate_elapsed_time(self): self.assertEqual(calculate_elapsed_time(None, None), 0) self.assertEqual( calculate_elapsed_time(started_time=None, archived_time=timezone.now()), 0) self.assertNotEqual( calculate_elapsed_time(started_time=timezone.now() - datetime.timedelta(seconds=1), archived_time=None), 0) # seconds start = timezone.now() archive = start + datetime.timedelta(seconds=59) self.assertEqual( calculate_elapsed_time(started_time=start, archived_time=archive), 59) # minutes start = timezone.now() archive = start + datetime.timedelta(minutes=3) self.assertEqual( calculate_elapsed_time(started_time=start, archived_time=archive), 3 * 60) # hours start = timezone.now() archive = start + datetime.timedelta(hours=3) self.assertEqual( calculate_elapsed_time(started_time=start, archived_time=archive), 3 * 60 * 60) # days start = timezone.now() archive = start + datetime.timedelta(days=3) self.assertEqual( calculate_elapsed_time(started_time=start, archived_time=archive), 3 * 24 * 60 * 60)
def elapsed_time(self): return calculate_elapsed_time(self.start_time, self.finish_time)
def recursive_collect_components_execution(activities, status_tree, task_instance, engine_ver=1, stack=None): """ @summary 递归流程树,获取所有执行结束的插件TaskflowExecutedNodeStatistics对象列表(成功/失败) @param activities: 当前流程树的任务节点信息 @param status_tree: 当前流程树的任务节点状态 @param task_instance: 根流程实例TaskFlowInstance @param stack: 子流程堆栈 @param engine_ver: 流程引擎版本 """ instance = task_instance.pipeline_instance task_instance_id = task_instance.id task_template = TaskTemplate.objects.get( pipeline_template=instance.template) if stack is None: stack = [] is_sub = False else: is_sub = True component_list = [] for act_id, act in activities.items(): if act_id in status_tree: exec_act = status_tree[act_id] # 标准插件节点 if act[PE.type] == PE.ServiceActivity: # 结束、失败、撤销 if exec_act["state"] in states.ARCHIVED_STATES: component_code = act["component"]["code"] component_version = act["component"].get( "version", LEGACY_PLUGINS_VERSION) is_remote = False if component_code == "remote_plugin": component_code = act["component"]["data"][ "plugin_code"]["value"] component_version = act["component"]["data"][ "plugin_version"]["value"] is_remote = True component_kwargs = { "component_code": component_code, "instance_id": instance.id, "task_instance_id": task_instance_id, "is_sub": is_sub, "node_id": act_id, "subprocess_stack": json.dumps(stack), "started_time": format_date_time(exec_act["start_time"]), "archived_time": format_date_time(exec_act["finish_time"]), "elapsed_time": exec_act.get( "elapsed_time", calculate_elapsed_time( format_date_time(exec_act["start_time"]), format_date_time(exec_act["finish_time"])), ), "is_skip": exec_act["skip"], "is_retry": False, "status": exec_act["state"] == "FINISHED", "version": component_version, "template_id": instance.template.id, "task_template_id": task_template.id, "project_id": task_template.project.id, "instance_create_time": instance.create_time, "instance_start_time": instance.start_time, "instance_finish_time": instance.finish_time, "is_remote": is_remote, } component_list.append( TaskflowExecutedNodeStatistics(**component_kwargs)) if exec_act["retry"] > 0: # 有重试记录,需要从执行历史中获取数据 if engine_ver == 1: history_list = pipeline_api.get_activity_histories( act_id) else: history_list_result = bamboo_engine_api.get_node_short_histories( runtime=BambooDjangoRuntime(), node_id=act_id) history_list = history_list_result.data if history_list_result.result else [] for history in history_list: component_kwargs.update({ "started_time": history["started_time"], "archived_time": history["archived_time"], "elapsed_time": history.get( "elapsed_time", calculate_elapsed_time( history["started_time"], history["archived_time"]), ), "is_retry": True, "is_skip": False, "status": False, }) component_list.append( TaskflowExecutedNodeStatistics( **component_kwargs)) # 子流程的执行堆栈(子流程的执行过程) elif act[PE.type] == PE.SubProcess: sub_activities = act[PE.pipeline][PE.activities] # 防止stack共用 copied_stack = deepcopy(stack) copied_stack.insert(0, act_id) component_list += recursive_collect_components_execution( activities=sub_activities, status_tree=exec_act["children"], task_instance=task_instance, stack=copied_stack, engine_ver=engine_ver, ) return component_list
def get_taskflow_node_detail(request): task_id = request.GET.get("task_id") node_id = request.GET.get("node_id") subprocess_stack = json.loads(request.GET.get("subprocess_stack", "[]")) data = { "execution_info": {}, "inputs": "pipeline has been destoryed", "outputs": "pipeline has been destoryed", "history": {}, "log": "", "ex_data": "", } taskflow = TaskFlowInstance.objects.get(id=task_id) if not taskflow.pipeline_instance.is_started: return JsonResponse({"result": False, "message": f"task[{task_id}] is not start"}) if not taskflow.has_node(node_id): return JsonResponse({"result": False, "message": f"task[{task_id}] does not have node[{node_id}]"}) status = Status.objects.get(id=node_id) # collect execution info data["execution_info"] = { "name": status.name, "start_time": status.started_time.strftime(SERIALIZE_DATE_FORMAT) if status.started_time else None, "archive_time": status.archived_time.strftime(SERIALIZE_DATE_FORMAT) if status.archived_time else None, "elapsed_time": calculate_elapsed_time(status.started_time, status.archived_time), "skip": status.skip, "error_ignorable": status.error_ignorable, "retry_times": status.retry, "id": status.id, "state": status.state, "loop": status.loop, "create_time": status.created_time, "version": status.version, "schedule_id": None, "is_scheduling": False, "schedule_times": 0, "wait_callback": False, "is_finished": False, "schedule_version": None, "callback_data": None, } try: schedule = ScheduleService.objects.schedule_for(status.id, status.version) except ScheduleService.DoesNotExist: pass else: data["execution_info"].update( { "schedule_id": schedule.id, "is_scheduling": schedule.is_scheduling, "schedule_times": schedule.schedule_times, "wait_callback": schedule.wait_callback, "is_finished": schedule.is_finished, "schedule_version": schedule.version, "callback_data": schedule.callback_data, } ) # collect inputs and outputs process = PipelineModel.objects.get(id=taskflow.pipeline_instance.instance_id).process # only process activity's inputs and outputs if process.root_pipeline: target_pipeline = process.root_pipeline for sub_id in subprocess_stack: subprocess_act = [x for x in target_pipeline.spec.activities if x.id == sub_id][0] target_pipeline = subprocess_act.pipeline node = target_pipeline.spec.objects[node_id] if isinstance(node, Activity): data["inputs"] = hydrate_inputs(node.data.inputs) data["outputs"] = node.data.outputs elif isinstance(node, Gateway): data["inputs"] = data["outputs"] = "gateway object does not have data" elif isinstance(node, StartEvent): data["inputs"] = data["outputs"] = "start event object does not have data" elif isinstance(node, EndEvent): data["inputs"] = node.data.inputs data["outputs"] = node.data.outputs elif taskflow.pipeline_instance.is_finished or taskflow.pipeline_instance.is_revoked: data["inputs"] = data["outputs"] = "pipeline had finished or had been revoked" # collect history data["history"] = task_service.get_activity_histories(node_id) # collect log data["log"] = handle_plain_log(task_service.get_plain_log_for_node(node_id)) # set ex_data data["ex_data"] = task_service.get_outputs(node_id)["ex_data"] return JsonResponse({"result": True, "data": data})
def migrate_instance(start, end): """ @summary: 执行“InstanceInPipeline-->TaskflowStatistics”的迁移 param start:InstanceInPipeline表的主键 param end:InstanceInPipeline表的主键 return success:是否成功 """ # 查询出所有目标记录 condition = Q() condition.children.append(("id__gte", start)) condition.children.append(("id__lt", end)) instance_in_pipeline_records = InstanceInPipeline.objects.filter(condition) # 构建数据源字典 data_source_list = [] for instance_in_pipeline in instance_in_pipeline_records: try: instance_id = instance_in_pipeline.instance_id pipeline_instance = PipelineInstance.objects.get( instance_id=instance_id) taskflow_instance = TaskFlowInstance.objects.get( pipeline_instance__id=pipeline_instance.id) pipeline_template = taskflow_instance.pipeline_instance.template task_template = TaskTemplate.objects.get( pipeline_template=pipeline_template) data_source_list.append({ "pipeline_instance": pipeline_instance, "taskflow_instance": taskflow_instance, "pipeline_template": pipeline_template, "task_template": task_template, "instance_in_pipeline": instance_in_pipeline, }) except Exception: logger.exception( f"[migrate_instance] dirty data error template_id={instance_id}" ) continue # 构建目标数据对象 for data_source_item in data_source_list: try: instance = data_source_item["pipeline_instance"] taskflow_instance = data_source_item["taskflow_instance"] task_template = data_source_item["task_template"] instance_in_pipeline = data_source_item["instance_in_pipeline"] pipeline_template = data_source_item["pipeline_template"] kwargs = dict( instance_id=instance.id, task_instance_id=taskflow_instance.id, atom_total=instance_in_pipeline.atom_total, subprocess_total=instance_in_pipeline.subprocess_total, gateways_total=instance_in_pipeline.gateways_total, project_id=taskflow_instance.project.id, category=task_template.category, template_id=pipeline_template.id, task_template_id=task_template.id, creator=instance.creator, create_time=instance.create_time, start_time=instance.start_time, finish_time=instance.finish_time, elapsed_time=calculate_elapsed_time(instance.start_time, instance.finish_time), create_method=taskflow_instance.create_method, ) except Exception: instance_id = instance.id logger.exception( f"[migrate_instance] unkwon error instance_id={instance_id}") continue try: with transaction.atomic(): TaskflowStatistics.objects.filter( instance_id=kwargs["instance_id"]).delete() taslflowstatistics = TaskflowStatistics.objects.create( **kwargs) taslflowstatistics.save() except Exception: instance_id = kwargs["instance_id"] logger.exception( f"[migrate_instance] instance_id={instance_id}的数据插入失败,自动回滚") return True