def run_project(project_root_path): af.set_project_config_file(project_root_path + '/project.yaml') # Config command line job, we set platform to local and engine to cmd_line here cmd_job_config = af.BaseJobConfig(platform=LocalPlatform.platform(), engine=CMDEngine().engine()) with af.config(cmd_job_config): # Command line job executor cmd_job = af.user_define_operation(executor=CmdExecutor(cmd_line="echo Start AI flow")) # Config python job, we set platform to local and engine to python here python_job_config = af.BaseJobConfig(platform=LocalPlatform.platform(), engine=PythonEngine.engine()) # Set execution mode of this python job to BATCH, # which indicates jobs with this config is running in the form of batch. python_job_config.exec_mode = af.ExecutionMode.BATCH with af.config(python_job_config): # Path of Source data(under '..../simple_transform_airflow' dir) source_path = os.path.dirname(os.path.abspath(__file__)) + '/source_data.csv' # Path of Sink data sink_path = os.path.dirname(os.path.abspath(__file__)) + '/sink_data.csv' # To make the project replaceable, we register the example in metadata service read_example_meta = af.register_example(name='read_example', support_type=ExampleSupportType.EXAMPLE_BATCH, data_format='csv', data_type='pandas', batch_uri=source_path) # Read training example using af.read_example() # example_info is the meta information of the example read_example_channel = af.read_example(example_info=read_example_meta, exec_args=ExecuteArgs( batch_properties=Args(header=None, names=["a", "b", "c"]))) # Transform examples using af.transform() transform_channel = af.transform(input_data_list=[read_example_channel], executor=PythonObjectExecutor(python_object=SimpleTransform())) write_example_meta = af.register_example(name='write_example', support_type=ExampleSupportType.EXAMPLE_BATCH, data_format='csv', data_type='pandas', batch_uri=sink_path) # Write example to specific path write = af.write_example(input_data=transform_channel, example_info=write_example_meta, exec_args=ExecuteArgs(batch_properties=Args(sep=',', header=False, index=False))) # Add control dependency, which means read_example job will start right after command line job finishes. af.stop_before_control_dependency(read_example_channel, cmd_job) transform_dag = 'simple_transform' af.deploy_to_airflow(project_root_path, dag_id=transform_dag) context = af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def run_workflow(): build_workflow() af.set_project_config_file(project_path + '/project.yaml') # deploy a workflow which should be scheduled every 10 minutes default_args = '{\'schedule_interval\': %s, \'start_date\': \'%s\'}' % ( '\'*/10 * * * *\'', datetime.utcnow()) af.deploy_to_airflow(project_path, dag_id='airflow_dag_example', default_args=default_args) # Force trigger once context = af.run(project_path=project_path, dag_id='airflow_dag_example', scheduler_type=SchedulerType.AIRFLOW) print(context.dagrun_id)
def run_airflow_dag_function(self): # waiting parsed dag file done from datetime import datetime ns_client = NotificationClient(server_uri='localhost:50051') with af.global_config_file(test_util.get_workflow_config_file()): with af.config('task_1'): cmd_executor = af.user_define_operation( output_num=0, executor=CmdExecutor(cmd_line=['echo "hello world!"'])) af.deploy_to_airflow(test_util.get_project_path(), dag_id='test_dag_111', default_args={ 'schedule_interval': None, 'start_date': datetime(2025, 12, 1), }) context = af.run(project_path=test_util.get_project_path(), dag_id='test_dag_111', scheduler_type=SchedulerType.AIRFLOW) print(context.dagrun_id) time.sleep(5) ns_client.send_event(StopSchedulerEvent(job_id=0).to_event())
def run_project(project_root_path): af.set_project_config_file(project_root_path + "/project.yaml") project_name = af.project_config().get_project_name() artifact_prefix = project_name + "." validate_trigger = af.external_trigger(name='validate') push_trigger = af.external_trigger(name='push') with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'): with af.config('train_job'): train_example = af.register_example(name=artifact_prefix + 'train_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('train')) train_read_example = af.read_example(example_info=train_example, executor=PythonObjectExecutor(python_object=TrainExampleReader())) train_transform = af.transform(input_data_list=[train_read_example], executor=PythonObjectExecutor(python_object=TrainExampleTransformer())) train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression', model_type=ModelType.SAVED_MODEL, model_desc='logistic regression model') train_channel = af.train(input_data_list=[train_transform], executor=PythonObjectExecutor(python_object=ModelTrainer()), model_info=train_model) with af.config('validate_job'): validate_example = af.register_example(name=artifact_prefix + 'validate_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('evaluate'), data_format='npz') validate_read_example = af.read_example(example_info=validate_example, executor=PythonObjectExecutor( python_object=ValidateExampleReader())) validate_transform = af.transform(input_data_list=[validate_read_example], executor=PythonObjectExecutor(python_object=ValidateTransformer())) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, stream_uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate(input_data_list=[validate_transform], model_info=train_model, executor=PythonObjectExecutor( python_object=ModelValidator(validate_artifact_name)), ) with af.config('push_job'): # Push model to serving # Register metadata of pushed model push_model_artifact_name = artifact_prefix + 'push_model_artifact' push_model_artifact = af.register_artifact(name=push_model_artifact_name, stream_uri=get_file_dir(__file__) + '/pushed_model') push_channel = af.push_model(model_info=train_model, executor=PythonObjectExecutor( python_object=ModelPusher(push_model_artifact_name))) with af.config('predict_job'): predict_example = af.register_example(name=artifact_prefix + 'predict_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('predict')) predict_read_example = af.read_example(example_info=predict_example, executor=PythonObjectExecutor(python_object=PredictExampleReader())) predict_transform = af.transform(input_data_list=[predict_read_example], executor=PythonObjectExecutor(python_object=PredictTransformer())) predict_channel = af.predict(input_data_list=[predict_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelPredictor())) write_example = af.register_example(name=artifact_prefix + 'export_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=get_file_dir(__file__) + '/predict_result') af.write_example(input_data=predict_channel, example_info=write_example, executor=PythonObjectExecutor(python_object=ExampleWriter())) af.model_version_control_dependency(src=validate_channel, model_version_event_type=ModelVersionEventType.MODEL_GENERATED, dependency=validate_trigger, model_name=train_model.name) af.model_version_control_dependency(src=push_channel, model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, dependency=push_trigger, model_name=train_model.name) # Run workflow transform_dag = project_name af.deploy_to_airflow(project_root_path, dag_id=transform_dag) af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def run_project(project_root_path): af.set_project_config_file(project_root_path + "/project.yaml") project_name = af.project_config().get_project_name() artifact_prefix = project_name + "." validate_trigger = af.external_trigger(name='validate') push_trigger = af.external_trigger(name='push') with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'): # the config of train job is a periodic job which means it will # run every `interval`(defined in workflow_config.yaml) seconds with af.config('train_job'): # Register metadata raw training data(example) and read example(i.e. training dataset) train_example = af.register_example(name=artifact_prefix + 'train_example', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=EXAMPLE_URI.format('train')) train_read_example = af.read_example(example_info=train_example, executor=PythonObjectExecutor(python_object=ExampleReader())) # Transform(preprocessing) example train_transform = af.transform(input_data_list=[train_read_example], executor=PythonObjectExecutor(python_object=ExampleTransformer())) # Register model metadata and train model train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression', model_type=ModelType.SAVED_MODEL, model_desc='logistic regression model') train_channel = af.train(input_data_list=[train_transform], executor=PythonObjectExecutor(python_object=ModelTrainer()), model_info=train_model) with af.config('validate_job'): # Validation of model # Read validation dataset and validate model before it is used to predict validate_example = af.register_example(name=artifact_prefix + 'validate_example', support_type=ExampleSupportType.EXAMPLE_STREAM, batch_uri=EXAMPLE_URI.format('evaluate')) validate_read_example = af.read_example(example_info=validate_example, executor=PythonObjectExecutor( python_object=ValidateExampleReader())) validate_transform = af.transform(input_data_list=[validate_read_example], executor=PythonObjectExecutor(python_object=ValidateTransformer())) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, batch_uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate(input_data_list=[validate_transform], model_info=train_model, executor=PythonObjectExecutor( python_object=ModelValidator(validate_artifact_name))) with af.config('push_job'): # Push model to serving # Register metadata of pushed model push_model_artifact_name = artifact_prefix + 'push_model_artifact' push_model_artifact = af.register_artifact(name=push_model_artifact_name, batch_uri=get_file_dir(__file__) + '/pushed_model') push_channel = af.push_model(model_info=train_model, executor=PythonObjectExecutor( python_object=ModelPusher(push_model_artifact_name))) with af.config('predict_job'): # Prediction(Inference) predict_example = af.register_example(name=artifact_prefix + 'predict_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('predict')) predict_read_example = af.read_example(example_info=predict_example, executor=PythonObjectExecutor(python_object=PredictExampleReader())) predict_transform = af.transform(input_data_list=[predict_read_example], executor=PythonObjectExecutor(python_object=PredictTransformer())) predict_channel = af.predict(input_data_list=[predict_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelPredictor())) # Save prediction result write_example = af.register_example(name=artifact_prefix + 'write_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=get_file_dir(__file__) + '/predict_result') af.write_example(input_data=predict_channel, example_info=write_example, executor=PythonObjectExecutor(python_object=ExampleWriter())) # Define relation graph connected by control edge: # Once a round of training is done, validator will be launched and # pusher will be launched if the new model is better. # Prediction will start once the first round of training is done and # when pusher pushes(deploys) a new model, the predictor will use the latest deployed model as well. af.model_version_control_dependency(src=validate_channel, model_version_event_type=ModelVersionEventType.MODEL_GENERATED, dependency=validate_trigger, model_name=train_model.name) af.model_version_control_dependency(src=push_channel, model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, dependency=push_trigger, model_name=train_model.name) # Run workflow transform_dag = project_name af.deploy_to_airflow(project_root_path, dag_id=transform_dag) af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)