def run_task_function(client: NotificationClient): with af.global_config_file(workflow_config_file()): with af.config(task_config): input_example = af.read_example( example_info=example_1, executor=faf.flink_executor.FlinkPythonExecutor( python_object=Source())) processed = af.transform( input_data_list=[input_example], executor=faf.flink_executor.FlinkPythonExecutor( python_object=Transformer())) af.write_example( input_data=processed, example_info=example_2, executor=faf.flink_executor.FlinkPythonExecutor( python_object=Sink())) workflow_info = af.workflow_operation.submit_workflow( workflow_name) af.workflow_operation.start_new_workflow_execution(workflow_name) while True: with create_session() as session: dag_run = session.query(DagRun).filter( DagRun.dag_id == 'test_project.test_workflow').first() if dag_run is not None and dag_run.state in State.finished: break else: time.sleep(1)
def run_flink_job(): with af.global_config_file(test_util.get_job_config_file()): with af.config('vvp_job'): faf.vvp_job() workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) print(res)
def run_flink_python_job(): with af.global_config_file(test_util.get_job_config_file()): with af.config('vvp_python_job'): faf.vvp_job() workflow_id = af.run(test_util.get_project_path(), dag_id='wordcount_vvp_python', scheduler_type=af.SchedulerType.AIRFLOW)
def run_task_function(client: NotificationClient): with af.global_config_file(workflow_config_file()): with af.config('task_1'): cmd_executor = af.user_define_operation( output_num=0, executor=CmdExecutor( cmd_line='echo "hello world" && sleep 30'.format( 1))) workflow_info = af.workflow_operation.submit_workflow( workflow_name) we = af.workflow_operation.start_new_workflow_execution( workflow_name) while True: with create_session() as session: ti = session.query(TaskInstance) \ .filter(TaskInstance.dag_id == 'test_project.test_workflow').first() if ti is not None and ti.state == State.RUNNING: af.workflow_operation.stop_job('task_1', we.execution_id) elif ti.state == State.KILLED: break else: time.sleep(1) job_info = af.workflow_operation.get_job('task_1', we.execution_id) self.assertEqual('task_1', job_info.job_name) self.assertEqual( project_name, job_info.workflow_execution.workflow_info.namespace) self.assertEqual( workflow_name, job_info.workflow_execution.workflow_info.workflow_name) job_info_list = af.workflow_operation.list_jobs(we.execution_id) self.assertEqual(1, len(job_info_list))
def build_workflow(): with af.global_config_file(project_path + '/resources/workflow_config.yaml'): with af.config('job_1'): op_1 = af.user_define_operation( af.PythonObjectExecutor(StreamPrintHelloExecutor('job_1'))) with af.config('job_2'): op_2 = af.user_define_operation( af.PythonObjectExecutor( SendEventExecutor(key='key_1', value='value_1'))) with af.config('job_3'): op_3 = af.user_define_operation( af.PythonObjectExecutor( SendEventExecutor(key='key_2', value='value_2'))) af.user_define_control_dependency(op_1, op_2, event_key='key_1', event_type='UNDEFINED', event_value="value_1") af.user_define_control_dependency(op_1, op_3, event_key='key_2', event_type='UNDEFINED', event_value="value_2")
def run_task_function(client: NotificationClient): with af.global_config_file(workflow_config_file()): with af.config('task_2'): executor_1 = af.user_define_operation( af.PythonObjectExecutor( SendExecutor(sender='task_2', key='key_1', value='value_1', event_type='UNDEFINED', port=server_port()))) with af.config('task_5'): executor_2 = af.user_define_operation( af.PythonObjectExecutor(SimpleExecutor())) af.user_define_control_dependency(src=executor_2, dependency=executor_1, event_key='key_1', event_value='value_1') workflow_info = af.workflow_operation.submit_workflow( workflow_name) af.workflow_operation.start_new_workflow_execution(workflow_name) while True: with create_session() as session: dag_run = session.query(DagRun).filter( DagRun.dag_id == 'test_project.test_workflow').first() if dag_run is not None and dag_run.state in State.finished: break else: time.sleep(1)
def run_task_function(client: NotificationClient): with af.global_config_file(workflow_config_file()): with af.config('task_2'): executor_1 = af.user_define_operation( af.PythonObjectExecutor(SimpleExecutor())) with af.config('task_5'): executor_2 = af.user_define_operation( af.PythonObjectExecutor(SimpleExecutor())) af.user_define_control_dependency(src=executor_2, dependency=executor_1, namespace='test', event_key='key_1', event_value='value_1', sender='*') workflow_info = af.workflow_operation.submit_workflow( workflow_name) af.workflow_operation.start_new_workflow_execution(workflow_name) flag = True while True: with create_session() as session: tes = session.query(TaskExecution).filter( TaskExecution.dag_id == 'test_project.test_workflow', TaskExecution.task_id == 'task_2').all() if 1 == len(tes) and flag: client.send_event( BaseEvent(key='key_1', value='value_1')) flag = False dag_run = session.query(DagRun).filter( DagRun.dag_id == 'test_project.test_workflow').first() if dag_run is not None and dag_run.state in State.finished: break else: time.sleep(1)
def build_workflow(): with af.global_config_file(project_path + '/resources/workflow_config.yaml'): with af.config('job_1'): af.user_define_operation(af.PythonObjectExecutor(StreamPrintEventExecutor('job_1'))) with af.config('job_2'): af.user_define_operation(af.PythonObjectExecutor( SendEventExecutor(key='key_1', value='value_1', num=5, post_time=5)))
def run_flink_spec_job(): with af.global_config_file(test_util.get_job_config_file()): with af.config('vvp_spec_job'): faf.vvp_job() workflow_id = af.run(test_util.get_project_path(), dag_id='wordcount_vvp_python', scheduler_type=af.SchedulerType.AIFLOW) res = af.wait_workflow_execution_finished(workflow_id) print(res)
def build_and_submit_ai_flow(): with af.global_config_file(workflow_config_file()): with af.config('task_1'): cmd_executor = af.user_define_operation( output_num=0, executor=CmdExecutor( cmd_line='echo "hello world"'.format(1))) workflow_info = af.workflow_operation.submit_workflow( 'test_workflow') return workflow_info.workflow_name
def build_workflow(workflow_config_path): with ai_flow.global_config_file(workflow_config_path): with ai_flow.config('job_1'): op_1 = ai_flow.user_define_operation( ai_flow.PythonObjectExecutor(PrintHelloExecutor('job_1'))) with ai_flow.config('job_2'): op_2 = ai_flow.user_define_operation( ai_flow.PythonObjectExecutor(PrintHelloExecutor('job_2'))) ai_flow.stop_before_control_dependency(op_2, op_1)
def run_task_function(client: NotificationClient): with af.global_config_file(workflow_config_file()): with af.config('task_1'): cmd_executor = af.user_define_operation( output_num=0, executor=CmdExecutor( cmd_line='echo "hello world" && sleep 30'.format( 1))) workflow_info = af.workflow_operation.submit_workflow( workflow_name) self.assertFalse(is_paused()) af.workflow_operation.pause_workflow_scheduling(workflow_name) self.assertTrue(is_paused()) af.workflow_operation.resume_workflow_scheduling(workflow_name) self.assertFalse(is_paused())
def test_context_with_yaml_file(self): config_file = path_util.get_file_dir(__file__) + "/workflow.yaml" with af.global_config_file(config_path=config_file) as g_config: with af.config('task_1') as config_1: self.assertEqual('task_1', config_1.job_name) self.assertEqual('cmd_line', config_1.engine) self.assertEqual('interval', config_1.periodic_config.periodic_type) self.assertEqual(20, config_1.periodic_config.args['seconds']) with af.config('task_2') as config_2: self.assertEqual('task_2', config_2.job_name) self.assertEqual('cmd_line', config_2.engine) self.assertEqual('cron', config_2.periodic_config.periodic_type) self.assertEqual('* * * * *', config_2.periodic_config.args)
def build_workflow(): with af.global_config_file(project_path + '/resources/workflow_config.yaml'): with af.config('job_1'): op_1 = af.user_define_operation( af.PythonObjectExecutor(PrintHelloExecutor('job_1'))) with af.config('job_2'): op_2 = af.user_define_operation( af.PythonObjectExecutor(PrintHelloExecutor('job_2'))) with af.config('job_3'): op_3 = af.user_define_operation( af.PythonObjectExecutor(PrintHelloExecutor('job_3'))) af.stop_before_control_dependency(op_3, op_1) af.stop_before_control_dependency(op_3, op_2)
def run_task_function(client: NotificationClient): with af.global_config_file(workflow_config_file()): with af.config('task_2'): executor = af.user_define_operation( af.PythonObjectExecutor(SimpleExecutor())) workflow_info = af.workflow_operation.submit_workflow( workflow_name) af.workflow_operation.start_new_workflow_execution(workflow_name) while True: with create_session() as session: dag_run = session.query(DagRun).filter( DagRun.dag_id == 'test_project.test_workflow').first() if dag_run is not None and dag_run.state in State.finished: break else: time.sleep(1)
def run_task_function(client: NotificationClient): with af.global_config_file(workflow_config_file()): with af.config('task_1'): cmd_executor = af.user_define_operation( output_num=0, executor=CmdExecutor( cmd_line='echo "hello world"'.format(1))) workflow_info = af.workflow_operation.submit_workflow( workflow_name) af.workflow_operation.start_new_workflow_execution(workflow_name) while True: with create_session() as session: dag_run = session.query(DagRun)\ .filter(DagRun.dag_id == 'test_project.{}'.format(workflow_name)).first() if dag_run is not None and dag_run.state == State.SUCCESS: break else: time.sleep(1)
def run_airflow_dag_function(self): # waiting parsed dag file done from datetime import datetime ns_client = NotificationClient(server_uri='localhost:50051') with af.global_config_file(test_util.get_workflow_config_file()): with af.config('task_1'): cmd_executor = af.user_define_operation( output_num=0, executor=CmdExecutor(cmd_line=['echo "hello world!"'])) af.deploy_to_airflow(test_util.get_project_path(), dag_id='test_dag_111', default_args={ 'schedule_interval': None, 'start_date': datetime(2025, 12, 1), }) context = af.run(project_path=test_util.get_project_path(), dag_id='test_dag_111', scheduler_type=SchedulerType.AIRFLOW) print(context.dagrun_id) time.sleep(5) ns_client.send_event(StopSchedulerEvent(job_id=0).to_event())
def run_task_function(client: NotificationClient): with af.global_config_file(workflow_config_file()): with af.config('task_1'): cmd_executor = af.user_define_operation( output_num=0, executor=CmdExecutor( cmd_line='echo "hello world"'.format(1))) workflow_info = af.workflow_operation.submit_workflow( workflow_name) workflow_info = af.workflow_operation.get_workflow(workflow_name) self.assertEqual(workflow_name, workflow_info.workflow_name) self.assertEqual(project_name, workflow_info.namespace) workflow_info_list = af.workflow_operation.list_workflows() self.assertEqual(1, len(workflow_info_list)) workflow_info = af.workflow_operation.delete_workflow( workflow_name) with self.assertRaises(Exception) as context: workflow_info = af.workflow_operation.get_workflow( workflow_name)
def build_workflow(): with af.global_config_file(project_path + '/resources/workflow_config.yaml'): with af.config('job_1'): op_1 = af.user_define_operation( af.PythonObjectExecutor(PrintHelloExecutor('job_1'))) with af.config('job_2'): op_2 = af.user_define_operation( af.PythonObjectExecutor( SendEventExecutor(key='key_1', value='value_1', num=3, post_time=20))) af.user_define_control_dependency(op_1, op_2, event_key='key_1', event_type='UNDEFINED', event_value="value_1", action=TaskAction.RESTART)
def run_task_function(client: NotificationClient): with af.global_config_file(workflow_config_file()): with af.config('task_2'): executor_1 = af.user_define_operation( af.PythonObjectExecutor(SimpleExecutor())) with af.config('task_3'): executor_2 = af.user_define_operation( af.PythonObjectExecutor(SimpleExecutor())) af.model_version_control_dependency( src=executor_2, dependency=executor_1, model_name='model_1', model_version_event_type=ModelVersionEventType. MODEL_GENERATED) workflow_info = af.workflow_operation.submit_workflow( workflow_name) af.workflow_operation.start_new_workflow_execution(workflow_name) r_flag = True while True: with create_session() as session: tes2 = session.query(TaskExecution).filter( TaskExecution.dag_id == 'test_project.test_workflow', TaskExecution.task_id == 'task_2').all() if len(tes2) == 1 and r_flag: af.register_model_version( model='model_1', model_path='/tmp/model/v1', current_stage=af.ModelVersionStage.GENERATED) r_flag = False dag_run = session.query(DagRun).filter( DagRun.dag_id == 'test_project.test_workflow').first() if dag_run is not None and dag_run.state in State.finished: break else: time.sleep(1)
def run_project(project_root_path): af.set_project_config_file(project_root_path + "/project.yaml") project_name = af.project_config().get_project_name() artifact_prefix = project_name + "." validate_trigger = af.external_trigger(name='validate') push_trigger = af.external_trigger(name='push') with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'): # the config of train job is a periodic job which means it will # run every `interval`(defined in workflow_config.yaml) seconds with af.config('train_job'): # Register metadata raw training data(example) and read example(i.e. training dataset) train_example = af.register_example(name=artifact_prefix + 'train_example', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=EXAMPLE_URI.format('train')) train_read_example = af.read_example(example_info=train_example, executor=PythonObjectExecutor(python_object=ExampleReader())) # Transform(preprocessing) example train_transform = af.transform(input_data_list=[train_read_example], executor=PythonObjectExecutor(python_object=ExampleTransformer())) # Register model metadata and train model train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression', model_type=ModelType.SAVED_MODEL, model_desc='logistic regression model') train_channel = af.train(input_data_list=[train_transform], executor=PythonObjectExecutor(python_object=ModelTrainer()), model_info=train_model) with af.config('validate_job'): # Validation of model # Read validation dataset and validate model before it is used to predict validate_example = af.register_example(name=artifact_prefix + 'validate_example', support_type=ExampleSupportType.EXAMPLE_STREAM, batch_uri=EXAMPLE_URI.format('evaluate')) validate_read_example = af.read_example(example_info=validate_example, executor=PythonObjectExecutor( python_object=ValidateExampleReader())) validate_transform = af.transform(input_data_list=[validate_read_example], executor=PythonObjectExecutor(python_object=ValidateTransformer())) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, batch_uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate(input_data_list=[validate_transform], model_info=train_model, executor=PythonObjectExecutor( python_object=ModelValidator(validate_artifact_name))) with af.config('push_job'): # Push model to serving # Register metadata of pushed model push_model_artifact_name = artifact_prefix + 'push_model_artifact' push_model_artifact = af.register_artifact(name=push_model_artifact_name, batch_uri=get_file_dir(__file__) + '/pushed_model') push_channel = af.push_model(model_info=train_model, executor=PythonObjectExecutor( python_object=ModelPusher(push_model_artifact_name))) with af.config('predict_job'): # Prediction(Inference) predict_example = af.register_example(name=artifact_prefix + 'predict_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('predict')) predict_read_example = af.read_example(example_info=predict_example, executor=PythonObjectExecutor(python_object=PredictExampleReader())) predict_transform = af.transform(input_data_list=[predict_read_example], executor=PythonObjectExecutor(python_object=PredictTransformer())) predict_channel = af.predict(input_data_list=[predict_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelPredictor())) # Save prediction result write_example = af.register_example(name=artifact_prefix + 'write_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=get_file_dir(__file__) + '/predict_result') af.write_example(input_data=predict_channel, example_info=write_example, executor=PythonObjectExecutor(python_object=ExampleWriter())) # Define relation graph connected by control edge: # Once a round of training is done, validator will be launched and # pusher will be launched if the new model is better. # Prediction will start once the first round of training is done and # when pusher pushes(deploys) a new model, the predictor will use the latest deployed model as well. af.model_version_control_dependency(src=validate_channel, model_version_event_type=ModelVersionEventType.MODEL_GENERATED, dependency=validate_trigger, model_name=train_model.name) af.model_version_control_dependency(src=push_channel, model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, dependency=push_trigger, model_name=train_model.name) # Run workflow transform_dag = project_name af.deploy_to_airflow(project_root_path, dag_id=transform_dag) af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def run_project(project_root_path): af.set_project_config_file(project_root_path + "/project.yaml") project_name = af.project_config().get_project_name() artifact_prefix = project_name + "." validate_trigger = af.external_trigger(name='validate') push_trigger = af.external_trigger(name='push') with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'): with af.config('train_job'): train_example = af.register_example(name=artifact_prefix + 'train_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('train')) train_read_example = af.read_example(example_info=train_example, executor=PythonObjectExecutor(python_object=TrainExampleReader())) train_transform = af.transform(input_data_list=[train_read_example], executor=PythonObjectExecutor(python_object=TrainExampleTransformer())) train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression', model_type=ModelType.SAVED_MODEL, model_desc='logistic regression model') train_channel = af.train(input_data_list=[train_transform], executor=PythonObjectExecutor(python_object=ModelTrainer()), model_info=train_model) with af.config('validate_job'): validate_example = af.register_example(name=artifact_prefix + 'validate_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('evaluate'), data_format='npz') validate_read_example = af.read_example(example_info=validate_example, executor=PythonObjectExecutor( python_object=ValidateExampleReader())) validate_transform = af.transform(input_data_list=[validate_read_example], executor=PythonObjectExecutor(python_object=ValidateTransformer())) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, stream_uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate(input_data_list=[validate_transform], model_info=train_model, executor=PythonObjectExecutor( python_object=ModelValidator(validate_artifact_name)), ) with af.config('push_job'): # Push model to serving # Register metadata of pushed model push_model_artifact_name = artifact_prefix + 'push_model_artifact' push_model_artifact = af.register_artifact(name=push_model_artifact_name, stream_uri=get_file_dir(__file__) + '/pushed_model') push_channel = af.push_model(model_info=train_model, executor=PythonObjectExecutor( python_object=ModelPusher(push_model_artifact_name))) with af.config('predict_job'): predict_example = af.register_example(name=artifact_prefix + 'predict_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('predict')) predict_read_example = af.read_example(example_info=predict_example, executor=PythonObjectExecutor(python_object=PredictExampleReader())) predict_transform = af.transform(input_data_list=[predict_read_example], executor=PythonObjectExecutor(python_object=PredictTransformer())) predict_channel = af.predict(input_data_list=[predict_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelPredictor())) write_example = af.register_example(name=artifact_prefix + 'export_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=get_file_dir(__file__) + '/predict_result') af.write_example(input_data=predict_channel, example_info=write_example, executor=PythonObjectExecutor(python_object=ExampleWriter())) af.model_version_control_dependency(src=validate_channel, model_version_event_type=ModelVersionEventType.MODEL_GENERATED, dependency=validate_trigger, model_name=train_model.name) af.model_version_control_dependency(src=push_channel, model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, dependency=push_trigger, model_name=train_model.name) # Run workflow transform_dag = project_name af.deploy_to_airflow(project_root_path, dag_id=transform_dag) af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def test_context_with_file(self): config_file = path_util.get_file_dir( __file__) + "/workflow_config.json" def generate_workflow_config(): workflow_config = WorkFlowConfig() workflow_config.add_job_config( config_key="global_config_key", job_config=af.BaseJobConfig( platform="local", engine="python", properties={"common_key": "common_value"})) workflow_config.add_job_config( config_key="test_job", job_config=af.BaseJobConfig( platform=None, engine=None, properties={"job_key": "job_value"})) workflow_config.add_job_config( config_key="test_job_1", job_config=af.BaseJobConfig( platform='kubernetes', engine='flink', properties={"job_key_1": "job_value_1"})) with open(config_file, 'w') as f: f.write(json_utils.dumps(workflow_config)) generate_workflow_config() with af.global_config_file(config_path=config_file): with af.config(config="test_job") as cc: cc.properties['aa'] = 'aa' af.user_define_operation(executor=None) node_list = list(_default_ai_graph.nodes.values()) self.assertEqual( 'python', node_list[len(node_list) - 1].properties[ENGINE_NAME]) self.assertEqual( 'common_value', node_list[len(node_list) - 1].config.properties["common_key"]) self.assertEqual( 'job_value', node_list[len(node_list) - 1].config.properties["job_key"]) self.assertEqual( 'aa', node_list[len(node_list) - 1].config.properties["aa"]) self.assertEqual('python', node_list[len(node_list) - 1].config.engine) self.assertEqual('local', node_list[len(node_list) - 1].config.platform) with af.config(config="test_job_1"): af.user_define_operation(executor=None) node_list = list(_default_ai_graph.nodes.values()) self.assertEqual( 'flink', node_list[len(node_list) - 1].properties[ENGINE_NAME]) self.assertEqual( 'common_value', node_list[len(node_list) - 1].config.properties["common_key"]) self.assertEqual( 'job_value_1', node_list[len(node_list) - 1].config.properties["job_key_1"]) self.assertEqual('flink', node_list[len(node_list) - 1].config.engine) self.assertEqual('kubernetes', node_list[len(node_list) - 1].config.platform)