Esempio n. 1
0
def run_project(project_root_path):

    af.set_project_config_file(project_root_path + '/project.yaml')
    # Config command line job, we set platform to local and engine to cmd_line here
    cmd_job_config = af.BaseJobConfig(platform=LocalPlatform.platform(), engine=CMDEngine().engine())
    with af.config(cmd_job_config):
        # Command line job executor
        cmd_job = af.user_define_operation(executor=CmdExecutor(cmd_line="echo Start AI flow"))

    # Config python job, we set platform to local and engine to python here
    python_job_config = af.BaseJobConfig(platform=LocalPlatform.platform(), engine=PythonEngine.engine())

    # Set execution mode of this python job to BATCH,
    # which indicates jobs with this config is running in the form of batch.
    python_job_config.exec_mode = af.ExecutionMode.BATCH

    with af.config(python_job_config):
        # Path of Source data(under '..../simple_transform_airflow' dir)
        source_path = os.path.dirname(os.path.abspath(__file__)) + '/source_data.csv'
        # Path of Sink data
        sink_path = os.path.dirname(os.path.abspath(__file__)) + '/sink_data.csv'

        # To make the project replaceable, we register the example in metadata service
        read_example_meta = af.register_example(name='read_example', support_type=ExampleSupportType.EXAMPLE_BATCH,
                                                data_format='csv', data_type='pandas', batch_uri=source_path)

        # Read training example using af.read_example()
        # example_info is the meta information of the example
        read_example_channel = af.read_example(example_info=read_example_meta, exec_args=ExecuteArgs(
            batch_properties=Args(header=None, names=["a", "b", "c"])))

        # Transform examples using af.transform()
        transform_channel = af.transform(input_data_list=[read_example_channel],
                                         executor=PythonObjectExecutor(python_object=SimpleTransform()))

        write_example_meta = af.register_example(name='write_example', support_type=ExampleSupportType.EXAMPLE_BATCH,
                                                 data_format='csv', data_type='pandas', batch_uri=sink_path)

        # Write example to specific path
        write = af.write_example(input_data=transform_channel, example_info=write_example_meta,
                                 exec_args=ExecuteArgs(batch_properties=Args(sep=',', header=False, index=False)))

    # Add control dependency, which means read_example job will start right after command line job finishes.
    af.stop_before_control_dependency(read_example_channel, cmd_job)

    transform_dag = 'simple_transform'
    af.deploy_to_airflow(project_root_path, dag_id=transform_dag)
    context = af.run(project_path=project_root_path,
                     dag_id=transform_dag,
                     scheduler_type=SchedulerType.AIRFLOW)
def run_workflow():
    build_workflow()
    af.set_project_config_file(project_path + '/project.yaml')

    # deploy a workflow which should be scheduled every 10 minutes
    default_args = '{\'schedule_interval\': %s, \'start_date\': \'%s\'}' % (
        '\'*/10 * * * *\'', datetime.utcnow())
    af.deploy_to_airflow(project_path,
                         dag_id='airflow_dag_example',
                         default_args=default_args)

    # Force trigger once
    context = af.run(project_path=project_path,
                     dag_id='airflow_dag_example',
                     scheduler_type=SchedulerType.AIRFLOW)
    print(context.dagrun_id)
Esempio n. 3
0
 def run_airflow_dag_function(self):
     # waiting parsed dag file done
     from datetime import datetime
     ns_client = NotificationClient(server_uri='localhost:50051')
     with af.global_config_file(test_util.get_workflow_config_file()):
         with af.config('task_1'):
             cmd_executor = af.user_define_operation(
                 output_num=0,
                 executor=CmdExecutor(cmd_line=['echo "hello world!"']))
     af.deploy_to_airflow(test_util.get_project_path(),
                          dag_id='test_dag_111',
                          default_args={
                              'schedule_interval': None,
                              'start_date': datetime(2025, 12, 1),
                          })
     context = af.run(project_path=test_util.get_project_path(),
                      dag_id='test_dag_111',
                      scheduler_type=SchedulerType.AIRFLOW)
     print(context.dagrun_id)
     time.sleep(5)
     ns_client.send_event(StopSchedulerEvent(job_id=0).to_event())
Esempio n. 4
0
def run_project(project_root_path):
    af.set_project_config_file(project_root_path + "/project.yaml")
    project_name = af.project_config().get_project_name()
    artifact_prefix = project_name + "."

    validate_trigger = af.external_trigger(name='validate')
    push_trigger = af.external_trigger(name='push')

    with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'):
        with af.config('train_job'):
            train_example = af.register_example(name=artifact_prefix + 'train_example',
                                                support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                stream_uri=EXAMPLE_URI.format('train'))
            train_read_example = af.read_example(example_info=train_example,
                                                 executor=PythonObjectExecutor(python_object=TrainExampleReader()))
            train_transform = af.transform(input_data_list=[train_read_example],
                                           executor=PythonObjectExecutor(python_object=TrainExampleTransformer()))
            train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression',
                                            model_type=ModelType.SAVED_MODEL,
                                            model_desc='logistic regression model')
            train_channel = af.train(input_data_list=[train_transform],
                                     executor=PythonObjectExecutor(python_object=ModelTrainer()),
                                     model_info=train_model)
        with af.config('validate_job'):
            validate_example = af.register_example(name=artifact_prefix + 'validate_example',
                                                   support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                   stream_uri=EXAMPLE_URI.format('evaluate'),
                                                   data_format='npz')
            validate_read_example = af.read_example(example_info=validate_example,
                                                    executor=PythonObjectExecutor(
                                                        python_object=ValidateExampleReader()))
            validate_transform = af.transform(input_data_list=[validate_read_example],
                                              executor=PythonObjectExecutor(python_object=ValidateTransformer()))
            validate_artifact_name = artifact_prefix + 'validate_artifact'
            validate_artifact = af.register_artifact(name=validate_artifact_name,
                                                     stream_uri=get_file_dir(__file__) + '/validate_result')
            validate_channel = af.model_validate(input_data_list=[validate_transform],
                                                 model_info=train_model,
                                                 executor=PythonObjectExecutor(
                                                     python_object=ModelValidator(validate_artifact_name)),
                                                 )
        with af.config('push_job'):
            # Push model to serving
            # Register metadata of pushed model
            push_model_artifact_name = artifact_prefix + 'push_model_artifact'
            push_model_artifact = af.register_artifact(name=push_model_artifact_name,
                                                       stream_uri=get_file_dir(__file__) + '/pushed_model')
            push_channel = af.push_model(model_info=train_model,
                                         executor=PythonObjectExecutor(
                                             python_object=ModelPusher(push_model_artifact_name)))

        with af.config('predict_job'):
            predict_example = af.register_example(name=artifact_prefix + 'predict_example',
                                                  support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                  stream_uri=EXAMPLE_URI.format('predict'))
            predict_read_example = af.read_example(example_info=predict_example,
                                                   executor=PythonObjectExecutor(python_object=PredictExampleReader()))
            predict_transform = af.transform(input_data_list=[predict_read_example],
                                             executor=PythonObjectExecutor(python_object=PredictTransformer()))
            predict_channel = af.predict(input_data_list=[predict_transform],
                                         model_info=train_model,
                                         executor=PythonObjectExecutor(python_object=ModelPredictor()))

            write_example = af.register_example(name=artifact_prefix + 'export_example',
                                                support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                stream_uri=get_file_dir(__file__) + '/predict_result')
            af.write_example(input_data=predict_channel,
                             example_info=write_example,
                             executor=PythonObjectExecutor(python_object=ExampleWriter()))

        af.model_version_control_dependency(src=validate_channel,
                                            model_version_event_type=ModelVersionEventType.MODEL_GENERATED,
                                            dependency=validate_trigger, model_name=train_model.name)

        af.model_version_control_dependency(src=push_channel,
                                            model_version_event_type=ModelVersionEventType.MODEL_VALIDATED,
                                            dependency=push_trigger, model_name=train_model.name)
    # Run workflow
    transform_dag = project_name
    af.deploy_to_airflow(project_root_path, dag_id=transform_dag)
    af.run(project_path=project_root_path,
           dag_id=transform_dag,
           scheduler_type=SchedulerType.AIRFLOW)
Esempio n. 5
0
def run_project(project_root_path):

    af.set_project_config_file(project_root_path + "/project.yaml")
    project_name = af.project_config().get_project_name()
    artifact_prefix = project_name + "."

    validate_trigger = af.external_trigger(name='validate')
    push_trigger = af.external_trigger(name='push')

    with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'):
        # the config of train job is a periodic job  which means it will
        # run every `interval`(defined in workflow_config.yaml) seconds
        with af.config('train_job'):
            # Register metadata raw training data(example) and read example(i.e. training dataset)
            train_example = af.register_example(name=artifact_prefix + 'train_example',
                                                support_type=ExampleSupportType.EXAMPLE_BATCH,
                                                batch_uri=EXAMPLE_URI.format('train'))
            train_read_example = af.read_example(example_info=train_example,
                                                 executor=PythonObjectExecutor(python_object=ExampleReader()))

            # Transform(preprocessing) example
            train_transform = af.transform(input_data_list=[train_read_example],
                                           executor=PythonObjectExecutor(python_object=ExampleTransformer()))

            # Register model metadata and train model
            train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression',
                                            model_type=ModelType.SAVED_MODEL,
                                            model_desc='logistic regression model')
            train_channel = af.train(input_data_list=[train_transform],
                                     executor=PythonObjectExecutor(python_object=ModelTrainer()),
                                     model_info=train_model)
        with af.config('validate_job'):
            # Validation of model
            # Read validation dataset and validate model before it is used to predict

            validate_example = af.register_example(name=artifact_prefix + 'validate_example',
                                                   support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                   batch_uri=EXAMPLE_URI.format('evaluate'))
            validate_read_example = af.read_example(example_info=validate_example,
                                                    executor=PythonObjectExecutor(
                                                        python_object=ValidateExampleReader()))
            validate_transform = af.transform(input_data_list=[validate_read_example],
                                              executor=PythonObjectExecutor(python_object=ValidateTransformer()))
            validate_artifact_name = artifact_prefix + 'validate_artifact'
            validate_artifact = af.register_artifact(name=validate_artifact_name,
                                                     batch_uri=get_file_dir(__file__) + '/validate_result')
            validate_channel = af.model_validate(input_data_list=[validate_transform],
                                                 model_info=train_model,
                                                 executor=PythonObjectExecutor(
                                                     python_object=ModelValidator(validate_artifact_name)))
        with af.config('push_job'):
            # Push model to serving
            # Register metadata of pushed model
            push_model_artifact_name = artifact_prefix + 'push_model_artifact'
            push_model_artifact = af.register_artifact(name=push_model_artifact_name,
                                                       batch_uri=get_file_dir(__file__) + '/pushed_model')
            push_channel = af.push_model(model_info=train_model,
                                         executor=PythonObjectExecutor(
                                            python_object=ModelPusher(push_model_artifact_name)))

        with af.config('predict_job'):
            # Prediction(Inference)
            predict_example = af.register_example(name=artifact_prefix + 'predict_example',
                                                  support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                  stream_uri=EXAMPLE_URI.format('predict'))
            predict_read_example = af.read_example(example_info=predict_example,
                                                   executor=PythonObjectExecutor(python_object=PredictExampleReader()))
            predict_transform = af.transform(input_data_list=[predict_read_example],
                                             executor=PythonObjectExecutor(python_object=PredictTransformer()))
            predict_channel = af.predict(input_data_list=[predict_transform],
                                         model_info=train_model,
                                         executor=PythonObjectExecutor(python_object=ModelPredictor()))
            # Save prediction result
            write_example = af.register_example(name=artifact_prefix + 'write_example',
                                                support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                stream_uri=get_file_dir(__file__) + '/predict_result')
            af.write_example(input_data=predict_channel,
                             example_info=write_example,
                             executor=PythonObjectExecutor(python_object=ExampleWriter()))

        # Define relation graph connected by control edge:
        # Once a round of training is done, validator will be launched and
        # pusher will be launched if the new model is better.
        # Prediction will start once the first round of training is done and
        # when pusher pushes(deploys) a new model, the predictor will use the latest deployed model as well.
        af.model_version_control_dependency(src=validate_channel,
                                            model_version_event_type=ModelVersionEventType.MODEL_GENERATED,
                                            dependency=validate_trigger, model_name=train_model.name)
        af.model_version_control_dependency(src=push_channel,
                                            model_version_event_type=ModelVersionEventType.MODEL_VALIDATED,
                                            dependency=push_trigger, model_name=train_model.name)

    # Run workflow
    transform_dag = project_name
    af.deploy_to_airflow(project_root_path, dag_id=transform_dag)
    af.run(project_path=project_root_path,
           dag_id=transform_dag,
           scheduler_type=SchedulerType.AIRFLOW)