def create_restart_job_workflow() -> Workflow: workflow = Workflow() for i in range(2): job = TestScheduler.create_periodic_job(i, 4) workflow.add_job(job) job = TestScheduler.create_periodic_job(2, 20) job.job_config.periodic_config = None workflow.add_job(job) deps = [] deps.append( JobControlEdge(target_node_id='0_job', source_node_id='2_job', met_config=MetConfig( event_key=generate_job_status_key('0_job'), event_value=State.FINISHED.value, action=TaskAction.RESTART))) deps.append( JobControlEdge(target_node_id='1_job', source_node_id='2_job', met_config=MetConfig( event_key=generate_job_status_key('1_job'), event_value=State.FINISHED.value, action=TaskAction.RESTART))) workflow.add_edges("2_job", deps) workflow.workflow_id = 1 return workflow
def create_bash_workflow() -> Workflow: workflow = Workflow() workflow.project_desc = ProjectDesc() workflow.project_desc.project_name = "workflow_1" for i in range(3): job = TestDAGGenerator.create_bash_job(i) workflow.add_job(job) deps_1 = [ JobControlEdge(target_node_id='', source_node_id='1_job', met_config=MetConfig( event_key='key_1', event_value='value_1', event_type=UNDEFINED_EVENT_TYPE)) ] deps_2 = [ JobControlEdge(target_node_id='', source_node_id='2_job', met_config=MetConfig( event_key='key_2', event_value='value_2', event_type=UNDEFINED_EVENT_TYPE)) ] workflow.add_edges("1_job", deps_1) workflow.add_edges("2_job", deps_2) workflow.workflow_id = 1 return workflow
def register_workflow_execution(self, workflow: Workflow): exec_name = generate_time_str() workflow_meta: WorkflowExecutionMeta = self.get_client( ).register_workflow_execution( name=exec_name, project_id=int( workflow.project_desc.project_config.get_project_uuid()), execution_state=State.INIT, workflow_json=dumps(workflow)) workflow.workflow_id = workflow_meta.uuid workflow.execution_name = exec_name # set workflow execution id to job context for job in workflow.jobs.values(): job.job_context.workflow_execution_id = workflow_meta.uuid
def submit_workflow(self, workflow: Workflow, project_desc: ProjectDesc, args: Dict = None) -> WorkflowInfo: workflow_name = workflow.workflow_name dag_id = self.airflow_dag_id(project_desc.project_name, workflow.workflow_name) code_text = self.dag_generator.generator(workflow, dag_id, args) workflow.workflow_name = workflow_name deploy_path = self.config.properties().get('airflow_deploy_path') if deploy_path is None: raise Exception("airflow_deploy_path config not set!") if not os.path.exists(deploy_path): os.makedirs(deploy_path) airflow_file_path = os.path.join(deploy_path, dag_id + '.py') if os.path.exists(airflow_file_path): os.remove(airflow_file_path) with NamedTemporaryFile(mode='w+t', prefix=dag_id, suffix='.py', dir='/tmp', delete=False) as f: f.write(code_text) os.rename(f.name, airflow_file_path) self.airflow_client.trigger_parse_dag(airflow_file_path) return WorkflowInfo(namespace=project_desc.project_name, workflow_name=workflow.workflow_name)
def _upload_project_package(workflow: Workflow): """ Uploads the project package of the given :class:`~ai_flow.workflow.workflow.Workflow` by the :class:`~ai_flow.plugin_interface.blob_manager_interface.BlobManager`. :param workflow: The generated :class:`~ai_flow.workflow.workflow.Workflow`. """ blob_manager = BlobManagerFactory.get_blob_manager(current_project_config().get(WorkflowPropertyKeys.BLOB)) uploaded_project_path = blob_manager.upload_project(str(workflow.workflow_snapshot_id), current_project_context().project_path) workflow.project_uri = uploaded_project_path workflow.properties[WorkflowPropertyKeys.BLOB] = current_project_config().get(WorkflowPropertyKeys.BLOB)
def build_workflow(self, split_graph: SplitGraph, project_context: ProjectContext) -> Workflow: workflow = Workflow() workflow.workflow_config = current_workflow_config() workflow.workflow_snapshot_id = '{}.{}.{}'.format( project_context.project_name, workflow.workflow_name, round(time.time() * 1000)) # add ai_nodes to workflow for sub in split_graph.nodes.values(): if sub.config.job_type not in self.job_generator_registry.object_dict: raise Exception("job generator not support job_type {}".format( sub.config.job_type)) generator: JobGenerator = self.job_generator_registry \ .get_object(sub.config.job_type) # set job resource dir job_resource_dir = os.path.join( project_context.get_generated_path(), workflow.workflow_snapshot_id, sub.config.job_name) if not os.path.exists(job_resource_dir): os.makedirs(job_resource_dir) job: Job = generator.generate(sub_graph=sub, resource_dir=job_resource_dir) job.resource_dir = job_resource_dir # set input output dataset for node in sub.nodes.values(): if isinstance(node, ReadDatasetNode): job.input_dataset_list.append(node.dataset()) elif isinstance(node, WriteDatasetNode): job.output_dataset_list.append(node.dataset()) workflow.add_job(job) def validate_edge(head, tail): if head not in workflow.jobs: raise Exception( 'job: {} is not defined in workflow!'.format(head)) if tail is not None and tail != '' and tail != '*' and tail not in workflow.jobs: raise Exception( 'job: {} is not defined in workflow!'.format(tail)) # add edges to workflow for edges in split_graph.edges.values(): for e in edges: control_edge = copy.deepcopy(e) validate_edge(control_edge.destination, control_edge.source) workflow.add_edge(control_edge.destination, control_edge) return workflow
def create_longrun_workflow() -> Workflow: workflow = Workflow() job_0 = TestScheduler.create_job(0, 10000) workflow.add_job(job_0) job_1 = TestScheduler.create_job(1, 1) workflow.add_job(job_1) deps = [] deps.append( JobControlEdge(target_node_id='1_job', source_node_id='0_job', met_config=MetConfig(event_key='key_1', event_value='value_1', event_type='stop', action=TaskAction.STOP))) workflow.add_edges("0_job", deps) workflow.workflow_id = 1 return workflow
def _apply_full_info_to_workflow(workflow: Workflow, entry_module_path: Text): """ Applies the full information to the specified :class:`~ai_flow.workflow.workflow.Workflow` with the given entry module path. The application of the workflow full information sets the entry module path, uploads the project package of the workflow and set the registered job plugins. :param workflow: The generated :class:`~ai_flow.workflow.workflow.Workflow`. :param entry_module_path: The entry module path of the workflow. """ workflow.workflow_config = current_workflow_config() _set_entry_module_path(workflow, entry_module_path) _upload_project_package(workflow) _set_job_plugins(workflow)
def generator(self, workflow: Workflow, dag_id=None, default_args=None) -> Text: if default_args is None: default_args = DAGTemplate.DEFAULT_ARGS_VALUE self.op_count = -1 if dag_id is None: dag_id = workflow.project_desc.project_name code_text = DAGTemplate.AIRFLOW_IMPORT workflow.workflow_name = dag_id op_set = set() for name, job in workflow.jobs.items(): generator: AirflowCodeGenerator = get_airflow_code_manager( ).get_generator(job.platform, job.exec_engine) if generator not in op_set: code_text += generator.generate_operator_code() op_set.add(generator) # code_text += DAGTemplate.SET_CONFIG.format(workflow.project_desc.project_path + '/project.yaml') code_text += DAGTemplate.DEFAULT_ARGS.format(default_args) code_text += DAGTemplate.DAG_DEFINE.format(dag_id) task_map = {} for name, job in workflow.jobs.items(): task_id, op_name, code = self.generate_op_code(job) task_map[task_id] = op_name code_text += code for instance_id, edges in workflow.edges.items(): op_name = task_map[instance_id] configs = [] for edge in edges: met_config: MetConfig = edge.met_config if match_stop_before_config(met_config): dep_task_id = edge.target_node_id code = self.generate_upstream(op_name, task_map[dep_task_id]) code_text += code else: code = self.generate_event_deps(op_name, met_config) code_text += code configs.append(met_config) if len(configs) > 0: code = self.generate_handler(op_name, configs) code_text += code return code_text
def test_workflow_serde(self): workflow_config_file = os.path.join(os.path.dirname(__file__), 'workflow_1.yaml') workflow_config = load_workflow_config(workflow_config_file) workflow = Workflow() workflow.workflow_config = workflow_config jobs = [] for job_config in workflow_config.job_configs.values(): job = Job(job_config=job_config) workflow.add_job(job) jobs.append(job) edge = ControlEdge(destination=jobs[0].job_name, scheduling_rule=SchedulingRule(MeetAnyEventCondition().add_event('a', 'a'), JobAction.START)) workflow.add_edge(jobs[0].job_name, edge) edge = ControlEdge(destination=jobs[0].job_name, scheduling_rule=SchedulingRule(MeetAnyEventCondition().add_event('b', 'b'), JobAction.START)) workflow.add_edge(jobs[0].job_name, edge) json_text = json_utils.dumps(workflow) w: Workflow = json_utils.loads(json_text) self.assertEqual(3, len(w.jobs)) self.assertEqual(2, len(w.edges.get(jobs[0].job_name)))
def build_workflow(self, split_graph: SplitGraph, project_desc: ProjectDesc) -> Workflow: sub_id_to_job_id = {} workflow = Workflow() workflow.project_desc = project_desc # add ai_nodes to workflow for sub in split_graph.nodes.values(): if (sub.config.platform, sub.config.engine ) not in self.job_generator_registry.object_dict: raise Exception( "job generator not support platform {} engine {}".format( sub.config.platform, sub.config.engine)) generator: BaseJobGenerator = self.job_generator_registry \ .get_object((sub.config.platform, sub.config.engine)) job: BaseJob = generator.generate(sub_graph=sub, project_desc=project_desc) job.job_config.project_desc = project_desc if job.job_name is None: job.job_name = job.job_config.job_name workflow.add_job(job) sub_id_to_job_id[sub.instance_id] = job.instance_id # add edges to workflow for edges in split_graph.edges.values(): for e in edges: control_edge = copy.deepcopy(e) control_edge.source_node_id = sub_id_to_job_id[ e.source_node_id] control_edge.target_node_id = sub_id_to_job_id[ e.target_node_id] job_edge: JobControlEdge = control_edge_to_job_edge( control_edge=control_edge) workflow.add_edge(sub_id_to_job_id[e.source_node_id], job_edge) self.register_workflow_execution(workflow) for job in workflow.jobs.values(): job.job_config.project_path = project_desc.project_path if job.job_config.exec_mode is None: job.job_config.exec_mode = ExecutionMode.BATCH generator: BaseJobGenerator = self.job_generator_registry \ .get_object((job.platform, job.exec_engine)) generator.generate_job_resource(job) return workflow
def create_workflow() -> Workflow: workflow = Workflow() for i in range(3): job = TestScheduler.create_job(i, 1) workflow.add_job(job) deps = [] deps.append( JobControlEdge(target_node_id='0_job', source_node_id='2_job', met_config=MetConfig( event_key=generate_job_status_key('0_job'), event_value=State.FINISHED.value))) deps.append( JobControlEdge(target_node_id='1_job', source_node_id='2_job', met_config=MetConfig( event_key=generate_job_status_key('1_job'), event_value=State.FINISHED.value))) workflow.add_edges("2_job", deps) workflow.workflow_id = 1 return workflow
def create_workflow() -> Workflow: workflow = Workflow() workflow.project_desc = ProjectDesc() workflow.project_desc.project_name = "workflow_1" for i in range(6): job = TestDAGGenerator.create_dummy_job(i) if i == 2: job.job_config = SendEventJobConfig('localhost:50051', 'key_1', 'value_1', UNDEFINED_EVENT_TYPE) elif i == 3: job.job_config = SendEventJobConfig('localhost:50051', 'key_2', 'value_2', UNDEFINED_EVENT_TYPE) elif i == 5: job.job_config = SendEventJobConfig('localhost:50051', 'key_2', 'value_2', "STOP_SCHEDULER_CMD") workflow.add_job(job) deps = [] deps.append(JobControlEdge(target_node_id='0_job', source_node_id='2_job', met_config=MetConfig(event_key=generate_job_status_key('0_job'), event_value=State.FINISHED.value))) deps.append(JobControlEdge(target_node_id='1_job', source_node_id='2_job', met_config=MetConfig(event_key=generate_job_status_key('1_job'), event_value=State.FINISHED.value))) workflow.add_edges("2_job", deps) deps = [] deps.append(JobControlEdge(target_node_id='2_job', source_node_id='4_job', met_config=MetConfig(event_key='key_1', event_value='value_1', event_type=UNDEFINED_EVENT_TYPE))) deps.append(JobControlEdge(target_node_id='3_job', source_node_id='4_job', met_config=MetConfig(event_key='key_2', event_value='value_2', event_type=UNDEFINED_EVENT_TYPE))) workflow.add_edges("4_job", deps) deps = [] deps.append(JobControlEdge(target_node_id='4_job', source_node_id='5_job', met_config=MetConfig(event_key=generate_job_status_key('5_job'), event_value=State.FINISHED.value))) workflow.add_edges("5_job", deps) workflow.workflow_id = 1 return workflow
def generator(self, workflow: Workflow, dag_id=None, default_args=None) -> Text: if default_args is None: default_args = DAGTemplate.DEFAULT_ARGS_VALUE self.op_count = -1 if dag_id is None: dag_id = workflow.project_desc.project_name code_text = DAGTemplate.AIRFLOW_IMPORT workflow.workflow_name = dag_id op_set = set() for name, job in workflow.jobs.items(): generator: AirflowCodeGenerator = get_airflow_code_manager( ).get_generator(job.platform, job.exec_engine) if generator not in op_set: code_text += generator.generate_operator_code() op_set.add(generator) # code_text += DAGTemplate.SET_CONFIG.format(workflow.project_desc.project_path + '/project.yaml') code_text += DAGTemplate.DEFAULT_ARGS.format(default_args) code_text += DAGTemplate.DAG_DEFINE.format(dag_id) task_map = {} for name, job in workflow.jobs.items(): task_id, op_name, code = self.generate_op_code(job) task_map[task_id] = op_name code_text += code # add periodic if job.job_config.periodic_config is not None: periodic_config: PeriodicConfig = job.job_config.periodic_config if 'interval' == periodic_config.periodic_type: code_text += DAGTemplate.PERIODIC_CONFIG.format( self.op_count, str({'interval': periodic_config.args})) elif 'cron' == periodic_config.periodic_type: code_text += DAGTemplate.PERIODIC_CONFIG.format( self.op_count, str({'cron': periodic_config.args})) else: raise Exception( 'periodic_config do not support {} type, only support interval and cron.' .format(periodic_config.periodic_type)) for instance_id, edges in workflow.edges.items(): op_name = task_map[instance_id] configs = [] for edge in edges: met_config: MetConfig = edge.met_config if match_stop_before_config(met_config): dep_task_id = edge.target_node_id code = self.generate_upstream(op_name, task_map[dep_task_id]) code_text += code else: code = self.generate_event_deps(op_name, met_config) code_text += code configs.append(met_config) if len(configs) > 0: code = self.generate_handler(op_name, configs) code_text += code return code_text
def create_workflow_one_job() -> Workflow: workflow = Workflow() workflow.workflow_id = 1 job = TestScheduler.create_job(0, 1) workflow.add_job(job) return workflow