def create_restart_job_workflow() -> Workflow:
        workflow = Workflow()
        for i in range(2):
            job = TestScheduler.create_periodic_job(i, 4)
            workflow.add_job(job)

        job = TestScheduler.create_periodic_job(2, 20)
        job.job_config.periodic_config = None
        workflow.add_job(job)
        deps = []
        deps.append(
            JobControlEdge(target_node_id='0_job',
                           source_node_id='2_job',
                           met_config=MetConfig(
                               event_key=generate_job_status_key('0_job'),
                               event_value=State.FINISHED.value,
                               action=TaskAction.RESTART)))
        deps.append(
            JobControlEdge(target_node_id='1_job',
                           source_node_id='2_job',
                           met_config=MetConfig(
                               event_key=generate_job_status_key('1_job'),
                               event_value=State.FINISHED.value,
                               action=TaskAction.RESTART)))
        workflow.add_edges("2_job", deps)
        workflow.workflow_id = 1
        return workflow
    def create_bash_workflow() -> Workflow:
        workflow = Workflow()
        workflow.project_desc = ProjectDesc()
        workflow.project_desc.project_name = "workflow_1"
        for i in range(3):
            job = TestDAGGenerator.create_bash_job(i)
            workflow.add_job(job)
        deps_1 = [
            JobControlEdge(target_node_id='',
                           source_node_id='1_job',
                           met_config=MetConfig(
                               event_key='key_1',
                               event_value='value_1',
                               event_type=UNDEFINED_EVENT_TYPE))
        ]
        deps_2 = [
            JobControlEdge(target_node_id='',
                           source_node_id='2_job',
                           met_config=MetConfig(
                               event_key='key_2',
                               event_value='value_2',
                               event_type=UNDEFINED_EVENT_TYPE))
        ]

        workflow.add_edges("1_job", deps_1)
        workflow.add_edges("2_job", deps_2)
        workflow.workflow_id = 1
        return workflow
    def register_workflow_execution(self, workflow: Workflow):
        exec_name = generate_time_str()
        workflow_meta: WorkflowExecutionMeta = self.get_client(
        ).register_workflow_execution(
            name=exec_name,
            project_id=int(
                workflow.project_desc.project_config.get_project_uuid()),
            execution_state=State.INIT,
            workflow_json=dumps(workflow))
        workflow.workflow_id = workflow_meta.uuid
        workflow.execution_name = exec_name

        # set workflow execution id to job context
        for job in workflow.jobs.values():
            job.job_context.workflow_execution_id = workflow_meta.uuid
 def submit_workflow(self,
                     workflow: Workflow,
                     project_desc: ProjectDesc,
                     args: Dict = None) -> WorkflowInfo:
     workflow_name = workflow.workflow_name
     dag_id = self.airflow_dag_id(project_desc.project_name,
                                  workflow.workflow_name)
     code_text = self.dag_generator.generator(workflow, dag_id, args)
     workflow.workflow_name = workflow_name
     deploy_path = self.config.properties().get('airflow_deploy_path')
     if deploy_path is None:
         raise Exception("airflow_deploy_path config not set!")
     if not os.path.exists(deploy_path):
         os.makedirs(deploy_path)
     airflow_file_path = os.path.join(deploy_path, dag_id + '.py')
     if os.path.exists(airflow_file_path):
         os.remove(airflow_file_path)
     with NamedTemporaryFile(mode='w+t',
                             prefix=dag_id,
                             suffix='.py',
                             dir='/tmp',
                             delete=False) as f:
         f.write(code_text)
     os.rename(f.name, airflow_file_path)
     self.airflow_client.trigger_parse_dag(airflow_file_path)
     return WorkflowInfo(namespace=project_desc.project_name,
                         workflow_name=workflow.workflow_name)
Exemple #5
0
def _upload_project_package(workflow: Workflow):
    """
    Uploads the project package of the given :class:`~ai_flow.workflow.workflow.Workflow` by the
    :class:`~ai_flow.plugin_interface.blob_manager_interface.BlobManager`.

    :param workflow: The generated :class:`~ai_flow.workflow.workflow.Workflow`.
    """
    blob_manager = BlobManagerFactory.get_blob_manager(current_project_config().get(WorkflowPropertyKeys.BLOB))
    uploaded_project_path = blob_manager.upload_project(str(workflow.workflow_snapshot_id),
                                                        current_project_context().project_path)
    workflow.project_uri = uploaded_project_path
    workflow.properties[WorkflowPropertyKeys.BLOB] = current_project_config().get(WorkflowPropertyKeys.BLOB)
Exemple #6
0
    def build_workflow(self, split_graph: SplitGraph,
                       project_context: ProjectContext) -> Workflow:
        workflow = Workflow()
        workflow.workflow_config = current_workflow_config()
        workflow.workflow_snapshot_id = '{}.{}.{}'.format(
            project_context.project_name, workflow.workflow_name,
            round(time.time() * 1000))
        # add ai_nodes to workflow
        for sub in split_graph.nodes.values():
            if sub.config.job_type not in self.job_generator_registry.object_dict:
                raise Exception("job generator not support job_type {}".format(
                    sub.config.job_type))
            generator: JobGenerator = self.job_generator_registry \
                .get_object(sub.config.job_type)

            # set job resource dir
            job_resource_dir = os.path.join(
                project_context.get_generated_path(),
                workflow.workflow_snapshot_id, sub.config.job_name)
            if not os.path.exists(job_resource_dir):
                os.makedirs(job_resource_dir)

            job: Job = generator.generate(sub_graph=sub,
                                          resource_dir=job_resource_dir)
            job.resource_dir = job_resource_dir

            # set input output dataset
            for node in sub.nodes.values():
                if isinstance(node, ReadDatasetNode):
                    job.input_dataset_list.append(node.dataset())
                elif isinstance(node, WriteDatasetNode):
                    job.output_dataset_list.append(node.dataset())

            workflow.add_job(job)

        def validate_edge(head, tail):
            if head not in workflow.jobs:
                raise Exception(
                    'job: {} is not defined in workflow!'.format(head))
            if tail is not None and tail != '' and tail != '*' and tail not in workflow.jobs:
                raise Exception(
                    'job: {} is not defined in workflow!'.format(tail))

        # add edges to workflow
        for edges in split_graph.edges.values():
            for e in edges:
                control_edge = copy.deepcopy(e)
                validate_edge(control_edge.destination, control_edge.source)
                workflow.add_edge(control_edge.destination, control_edge)
        return workflow
    def create_longrun_workflow() -> Workflow:
        workflow = Workflow()
        job_0 = TestScheduler.create_job(0, 10000)
        workflow.add_job(job_0)
        job_1 = TestScheduler.create_job(1, 1)
        workflow.add_job(job_1)
        deps = []
        deps.append(
            JobControlEdge(target_node_id='1_job',
                           source_node_id='0_job',
                           met_config=MetConfig(event_key='key_1',
                                                event_value='value_1',
                                                event_type='stop',
                                                action=TaskAction.STOP)))

        workflow.add_edges("0_job", deps)
        workflow.workflow_id = 1
        return workflow
Exemple #8
0
def _apply_full_info_to_workflow(workflow: Workflow, entry_module_path: Text):
    """
    Applies the full information to the specified :class:`~ai_flow.workflow.workflow.Workflow` with the given entry
    module path. The application of the workflow full information sets the entry module path, uploads the project
    package of the workflow and set the registered job plugins.

    :param workflow: The generated :class:`~ai_flow.workflow.workflow.Workflow`.
    :param entry_module_path: The entry module path of the workflow.
    """
    workflow.workflow_config = current_workflow_config()
    _set_entry_module_path(workflow, entry_module_path)
    _upload_project_package(workflow)
    _set_job_plugins(workflow)
Exemple #9
0
    def generator(self,
                  workflow: Workflow,
                  dag_id=None,
                  default_args=None) -> Text:
        if default_args is None:
            default_args = DAGTemplate.DEFAULT_ARGS_VALUE
        self.op_count = -1
        if dag_id is None:
            dag_id = workflow.project_desc.project_name
        code_text = DAGTemplate.AIRFLOW_IMPORT
        workflow.workflow_name = dag_id

        op_set = set()
        for name, job in workflow.jobs.items():
            generator: AirflowCodeGenerator = get_airflow_code_manager(
            ).get_generator(job.platform, job.exec_engine)
            if generator not in op_set:
                code_text += generator.generate_operator_code()
                op_set.add(generator)

        # code_text += DAGTemplate.SET_CONFIG.format(workflow.project_desc.project_path + '/project.yaml')
        code_text += DAGTemplate.DEFAULT_ARGS.format(default_args)
        code_text += DAGTemplate.DAG_DEFINE.format(dag_id)

        task_map = {}
        for name, job in workflow.jobs.items():
            task_id, op_name, code = self.generate_op_code(job)
            task_map[task_id] = op_name
            code_text += code

        for instance_id, edges in workflow.edges.items():
            op_name = task_map[instance_id]
            configs = []
            for edge in edges:
                met_config: MetConfig = edge.met_config
                if match_stop_before_config(met_config):
                    dep_task_id = edge.target_node_id
                    code = self.generate_upstream(op_name,
                                                  task_map[dep_task_id])
                    code_text += code
                else:
                    code = self.generate_event_deps(op_name, met_config)
                    code_text += code
                    configs.append(met_config)
            if len(configs) > 0:
                code = self.generate_handler(op_name, configs)
                code_text += code

        return code_text
 def test_workflow_serde(self):
     workflow_config_file = os.path.join(os.path.dirname(__file__), 'workflow_1.yaml')
     workflow_config = load_workflow_config(workflow_config_file)
     workflow = Workflow()
     workflow.workflow_config = workflow_config
     jobs = []
     for job_config in workflow_config.job_configs.values():
         job = Job(job_config=job_config)
         workflow.add_job(job)
         jobs.append(job)
     edge = ControlEdge(destination=jobs[0].job_name,
                        scheduling_rule=SchedulingRule(MeetAnyEventCondition().add_event('a', 'a'),
                                                       JobAction.START))
     workflow.add_edge(jobs[0].job_name, edge)
     edge = ControlEdge(destination=jobs[0].job_name,
                        scheduling_rule=SchedulingRule(MeetAnyEventCondition().add_event('b', 'b'),
                                                       JobAction.START))
     workflow.add_edge(jobs[0].job_name, edge)
     json_text = json_utils.dumps(workflow)
     w: Workflow = json_utils.loads(json_text)
     self.assertEqual(3, len(w.jobs))
     self.assertEqual(2, len(w.edges.get(jobs[0].job_name)))
    def build_workflow(self, split_graph: SplitGraph,
                       project_desc: ProjectDesc) -> Workflow:
        sub_id_to_job_id = {}
        workflow = Workflow()
        workflow.project_desc = project_desc
        # add ai_nodes to workflow
        for sub in split_graph.nodes.values():
            if (sub.config.platform, sub.config.engine
                ) not in self.job_generator_registry.object_dict:
                raise Exception(
                    "job generator not support platform {} engine {}".format(
                        sub.config.platform, sub.config.engine))
            generator: BaseJobGenerator = self.job_generator_registry \
                .get_object((sub.config.platform, sub.config.engine))
            job: BaseJob = generator.generate(sub_graph=sub,
                                              project_desc=project_desc)
            job.job_config.project_desc = project_desc
            if job.job_name is None:
                job.job_name = job.job_config.job_name
            workflow.add_job(job)
            sub_id_to_job_id[sub.instance_id] = job.instance_id
        # add edges to workflow
        for edges in split_graph.edges.values():
            for e in edges:
                control_edge = copy.deepcopy(e)
                control_edge.source_node_id = sub_id_to_job_id[
                    e.source_node_id]
                control_edge.target_node_id = sub_id_to_job_id[
                    e.target_node_id]
                job_edge: JobControlEdge = control_edge_to_job_edge(
                    control_edge=control_edge)
                workflow.add_edge(sub_id_to_job_id[e.source_node_id], job_edge)

        self.register_workflow_execution(workflow)

        for job in workflow.jobs.values():
            job.job_config.project_path = project_desc.project_path
            if job.job_config.exec_mode is None:
                job.job_config.exec_mode = ExecutionMode.BATCH
            generator: BaseJobGenerator = self.job_generator_registry \
                .get_object((job.platform, job.exec_engine))
            generator.generate_job_resource(job)
        return workflow
 def create_workflow() -> Workflow:
     workflow = Workflow()
     for i in range(3):
         job = TestScheduler.create_job(i, 1)
         workflow.add_job(job)
     deps = []
     deps.append(
         JobControlEdge(target_node_id='0_job',
                        source_node_id='2_job',
                        met_config=MetConfig(
                            event_key=generate_job_status_key('0_job'),
                            event_value=State.FINISHED.value)))
     deps.append(
         JobControlEdge(target_node_id='1_job',
                        source_node_id='2_job',
                        met_config=MetConfig(
                            event_key=generate_job_status_key('1_job'),
                            event_value=State.FINISHED.value)))
     workflow.add_edges("2_job", deps)
     workflow.workflow_id = 1
     return workflow
    def create_workflow() -> Workflow:
        workflow = Workflow()
        workflow.project_desc = ProjectDesc()
        workflow.project_desc.project_name = "workflow_1"
        for i in range(6):
            job = TestDAGGenerator.create_dummy_job(i)
            if i == 2:
                job.job_config = SendEventJobConfig('localhost:50051', 'key_1', 'value_1', UNDEFINED_EVENT_TYPE)
            elif i == 3:
                job.job_config = SendEventJobConfig('localhost:50051', 'key_2', 'value_2', UNDEFINED_EVENT_TYPE)
            elif i == 5:
                job.job_config = SendEventJobConfig('localhost:50051', 'key_2', 'value_2', "STOP_SCHEDULER_CMD")
            workflow.add_job(job)
        deps = []
        deps.append(JobControlEdge(target_node_id='0_job', source_node_id='2_job',
                                   met_config=MetConfig(event_key=generate_job_status_key('0_job'),
                                                        event_value=State.FINISHED.value)))
        deps.append(JobControlEdge(target_node_id='1_job', source_node_id='2_job',
                                   met_config=MetConfig(event_key=generate_job_status_key('1_job'),
                                                        event_value=State.FINISHED.value)))
        workflow.add_edges("2_job", deps)

        deps = []
        deps.append(JobControlEdge(target_node_id='2_job', source_node_id='4_job',
                                   met_config=MetConfig(event_key='key_1',
                                                        event_value='value_1',
                                                        event_type=UNDEFINED_EVENT_TYPE)))
        deps.append(JobControlEdge(target_node_id='3_job', source_node_id='4_job',
                                   met_config=MetConfig(event_key='key_2',
                                                        event_value='value_2',
                                                        event_type=UNDEFINED_EVENT_TYPE)))
        workflow.add_edges("4_job", deps)

        deps = []
        deps.append(JobControlEdge(target_node_id='4_job', source_node_id='5_job',
                                   met_config=MetConfig(event_key=generate_job_status_key('5_job'),
                                                        event_value=State.FINISHED.value)))
        workflow.add_edges("5_job", deps)
        workflow.workflow_id = 1
        return workflow
    def generator(self,
                  workflow: Workflow,
                  dag_id=None,
                  default_args=None) -> Text:
        if default_args is None:
            default_args = DAGTemplate.DEFAULT_ARGS_VALUE
        self.op_count = -1
        if dag_id is None:
            dag_id = workflow.project_desc.project_name
        code_text = DAGTemplate.AIRFLOW_IMPORT
        workflow.workflow_name = dag_id

        op_set = set()
        for name, job in workflow.jobs.items():
            generator: AirflowCodeGenerator = get_airflow_code_manager(
            ).get_generator(job.platform, job.exec_engine)
            if generator not in op_set:
                code_text += generator.generate_operator_code()
                op_set.add(generator)

        # code_text += DAGTemplate.SET_CONFIG.format(workflow.project_desc.project_path + '/project.yaml')
        code_text += DAGTemplate.DEFAULT_ARGS.format(default_args)
        code_text += DAGTemplate.DAG_DEFINE.format(dag_id)

        task_map = {}
        for name, job in workflow.jobs.items():
            task_id, op_name, code = self.generate_op_code(job)
            task_map[task_id] = op_name
            code_text += code
            # add periodic
            if job.job_config.periodic_config is not None:
                periodic_config: PeriodicConfig = job.job_config.periodic_config
                if 'interval' == periodic_config.periodic_type:
                    code_text += DAGTemplate.PERIODIC_CONFIG.format(
                        self.op_count, str({'interval': periodic_config.args}))
                elif 'cron' == periodic_config.periodic_type:
                    code_text += DAGTemplate.PERIODIC_CONFIG.format(
                        self.op_count, str({'cron': periodic_config.args}))
                else:
                    raise Exception(
                        'periodic_config do not support {} type, only support interval and cron.'
                        .format(periodic_config.periodic_type))

        for instance_id, edges in workflow.edges.items():
            op_name = task_map[instance_id]
            configs = []
            for edge in edges:
                met_config: MetConfig = edge.met_config
                if match_stop_before_config(met_config):
                    dep_task_id = edge.target_node_id
                    code = self.generate_upstream(op_name,
                                                  task_map[dep_task_id])
                    code_text += code
                else:
                    code = self.generate_event_deps(op_name, met_config)
                    code_text += code
                    configs.append(met_config)
            if len(configs) > 0:
                code = self.generate_handler(op_name, configs)
                code_text += code

        return code_text
 def create_workflow_one_job() -> Workflow:
     workflow = Workflow()
     workflow.workflow_id = 1
     job = TestScheduler.create_job(0, 1)
     workflow.add_job(job)
     return workflow