Example #1
0
def run_flink_python_job():
    with af.global_config_file(test_util.get_job_config_file()):
        with af.config('vvp_python_job'):
            faf.vvp_job()
    workflow_id = af.run(test_util.get_project_path(),
                         dag_id='wordcount_vvp_python',
                         scheduler_type=af.SchedulerType.AIRFLOW)
Example #2
0
    def test_stream_transform_component(self):
        file = get_file_dir(__file__) + '/test1.csv'
        input_example_meta = af.register_example(
            name='test_example',
            support_type=ExampleSupportType.EXAMPLE_BOTH,
            stream_uri=file)
        output_file = get_file_dir(
            __file__) + "/output_transform_stream_test1.csv"
        output_example_meta = af.register_example(
            name='test_example_output',
            support_type=ExampleSupportType.EXAMPLE_BOTH,
            stream_uri=output_file)
        with af.config(
                af.BaseJobConfig(platform='local',
                                 engine='python',
                                 job_name='stream_transform')):
            input_example = af.read_example(
                example_info=input_example_meta,
                executor=PythonObjectExecutor(
                    python_object=ReadStreamExample()))
            transform_example = af.transform(
                input_data_list=[input_example],
                executor=PythonObjectExecutor(
                    python_object=TransformStreamData()))

            af.write_example(input_data=transform_example,
                             example_info=output_example_meta.name,
                             executor=PythonObjectExecutor(
                                 python_object=WriteStreamExample()))
        workflow_id = af.run(test_util.get_project_path())
        res = af.wait_workflow_execution_finished(workflow_id)
        self.assertEqual(0, res)
Example #3
0
def run_flink_job():
    with af.global_config_file(test_util.get_job_config_file()):
        with af.config('vvp_job'):
            faf.vvp_job()
    workflow_id = af.run(test_util.get_project_path())
    res = af.wait_workflow_execution_finished(workflow_id)
    print(res)
 def test_read_example_with_numpy_npy(self):
     npy_name = 'test.npy'
     np.save(file=npy_name, arr=np.arange(10))
     input_example_meta = af.register_example(
         name='input_numpy_example',
         data_type='numpy',
         data_format='npy',
         support_type=ExampleSupportType.EXAMPLE_BATCH,
         batch_uri=os.path.abspath(
             os.path.dirname(__file__) + "/" + npy_name))
     output_example_meta = af.register_example(
         name='ouput_numpy_example',
         data_type='numpy',
         data_format='npy',
         support_type=ExampleSupportType.EXAMPLE_BATCH,
         batch_uri=os.path.abspath(
             os.path.dirname(__file__) + '/numpy_output.npy'))
     with af.config(
             af.BaseJobConfig(platform='local',
                              engine='python',
                              job_name='test_npy')):
         example_channel = af.read_example(example_info=input_example_meta)
         af.write_example(input_data=example_channel,
                          example_info=output_example_meta)
     workflow_id = af.run(test_util.get_project_path())
     res = af.wait_workflow_execution_finished(workflow_id)
     self.assertEqual(0, res)
 def test_read_example_with_pandas(self):
     input_example_meta = af.register_example(
         name='input_pandas_example',
         data_type='pandas',
         data_format='csv',
         support_type=ExampleSupportType.EXAMPLE_BATCH,
         batch_uri=os.path.abspath(
             os.path.dirname(__file__) + '/test1.csv'))
     output_example_meta = af.register_example(
         name='ouput_pandas_example',
         data_type='pandas',
         data_format='csv',
         support_type=ExampleSupportType.EXAMPLE_BATCH,
         batch_uri=os.path.abspath(
             os.path.dirname(__file__) + '/pandas_output.csv'))
     with af.config(
             af.BaseJobConfig(platform='local',
                              engine='python',
                              job_name='test_csv')):
         example_channel = af.read_example(example_info=input_example_meta)
         af.write_example(input_data=example_channel,
                          example_info=output_example_meta)
     workflow_id = af.run(test_util.get_project_path())
     res = af.wait_workflow_execution_finished(workflow_id)
     self.assertEqual(0, res)
Example #6
0
    def test_batch_train_component_with_an_output(self):
        input_example_meta = af.register_example(
            name='batch_train_example',
            support_type=ExampleSupportType.EXAMPLE_BATCH)
        model_meta = af.register_model(model_name='mnist_model',
                                       model_type=ModelType.SAVED_MODEL)

        example_meta = af.register_example(
            name='output_example',
            support_type=ExampleSupportType.EXAMPLE_BATCH,
            data_type='numpy',
            data_format='npz',
            batch_uri=os.path.abspath(
                os.path.dirname(__file__) + '/numpy_output.npz'))
        with af.config(
                af.BaseJobConfig(platform='local',
                                 engine='python',
                                 job_name='batch_train')):
            input_example = af.read_example(
                example_info=input_example_meta,
                executor=PythonObjectExecutor(
                    python_object=ReadBatchExample()))
            train_channel = af.train(
                input_data_list=[input_example],
                executor=PythonObjectExecutor(
                    python_object=TrainBatchMnistModelWithOutput()),
                model_info=model_meta,
                output_num=1)
            af.write_example(input_data=train_channel,
                             example_info=example_meta)
        workflow_id = af.run(test_util.get_project_path())
        res = af.wait_workflow_execution_finished(workflow_id)
        self.assertEqual(0, res)
def run_workflow():
    build_workflow()
    af.set_project_config_file(project_path + '/project.yaml')
    res = af.run(project_path,
                 dag_id='repeated_dag_example',
                 scheduler_type=SchedulerType.AIFLOW)
    af.wait_workflow_execution_finished(res)
Example #8
0
 def test_stream_train_component(self):
     batch_input_example_meta = af.register_example(
         name='stream_train_example',
         support_type=ExampleSupportType.EXAMPLE_BOTH)
     model_meta = af.register_model(model_name='mnist_model',
                                    model_type=ModelType.SAVED_MODEL)
     stream_input_example_meta = af.register_example(
         name='stream_train_example',
         support_type=ExampleSupportType.EXAMPLE_BOTH)
     with af.config(
             af.BaseJobConfig(platform='local',
                              engine='python',
                              job_name='stream_train')):
         batch_input_example = af.read_example(
             example_info=batch_input_example_meta,
             executor=PythonObjectExecutor(
                 python_object=ReadBatchExample()))
         batch_train = af.train(input_data_list=[batch_input_example],
                                executor=PythonObjectExecutor(
                                    python_object=TrainBatchMnistModel()),
                                model_info=model_meta)
         stream_input_example = af.read_example(
             example_info=stream_input_example_meta,
             executor=PythonObjectExecutor(
                 python_object=ReadStreamExample()))
         stream_train = af.train(input_data_list=[stream_input_example],
                                 executor=PythonObjectExecutor(
                                     python_object=TrainStreamMnistModel()),
                                 model_info=model_meta)
     af.stop_before_control_dependency(stream_train, batch_train)
     workflow_id = af.run(test_util.get_project_path())
     res = af.wait_workflow_execution_finished(workflow_id)
     self.assertEqual(0, res)
Example #9
0
def run_flink_spec_job():
    with af.global_config_file(test_util.get_job_config_file()):
        with af.config('vvp_spec_job'):
            faf.vvp_job()
    workflow_id = af.run(test_util.get_project_path(),
                         dag_id='wordcount_vvp_python',
                         scheduler_type=af.SchedulerType.AIFLOW)
    res = af.wait_workflow_execution_finished(workflow_id)
    print(res)
 def test_deploy_airflow(self):
     airflow_path = af.project_config().get_airflow_deploy_path()
     if not os.path.exists(airflow_path):
         os.makedirs(airflow_path)
     with af.config(LocalPythonJobConfig(job_name="simple")):
         op = af.user_define_operation(
             af.PythonObjectExecutor(SimpleExecutor()))
     res = af.run(test_util.get_project_path())
     af.wait_workflow_execution_finished(res)
Example #11
0
def run_project(project_root_path):

    af.set_project_config_file(project_root_path + '/project.yaml')
    # Config command line job, we set platform to local and engine to cmd_line here
    cmd_job_config = af.BaseJobConfig(platform=LocalPlatform.platform(), engine=CMDEngine().engine())
    with af.config(cmd_job_config):
        # Command line job executor
        cmd_job = af.user_define_operation(executor=CmdExecutor(cmd_line="echo Start AI flow"))

    # Config python job, we set platform to local and engine to python here
    python_job_config = af.BaseJobConfig(platform=LocalPlatform.platform(), engine=PythonEngine.engine())

    # Set execution mode of this python job to BATCH,
    # which indicates jobs with this config is running in the form of batch.
    python_job_config.exec_mode = af.ExecutionMode.BATCH

    with af.config(python_job_config):
        # Path of Source data(under '..../simple_transform_airflow' dir)
        source_path = os.path.dirname(os.path.abspath(__file__)) + '/source_data.csv'
        # Path of Sink data
        sink_path = os.path.dirname(os.path.abspath(__file__)) + '/sink_data.csv'

        # To make the project replaceable, we register the example in metadata service
        read_example_meta = af.register_example(name='read_example', support_type=ExampleSupportType.EXAMPLE_BATCH,
                                                data_format='csv', data_type='pandas', batch_uri=source_path)

        # Read training example using af.read_example()
        # example_info is the meta information of the example
        read_example_channel = af.read_example(example_info=read_example_meta, exec_args=ExecuteArgs(
            batch_properties=Args(header=None, names=["a", "b", "c"])))

        # Transform examples using af.transform()
        transform_channel = af.transform(input_data_list=[read_example_channel],
                                         executor=PythonObjectExecutor(python_object=SimpleTransform()))

        write_example_meta = af.register_example(name='write_example', support_type=ExampleSupportType.EXAMPLE_BATCH,
                                                 data_format='csv', data_type='pandas', batch_uri=sink_path)

        # Write example to specific path
        write = af.write_example(input_data=transform_channel, example_info=write_example_meta,
                                 exec_args=ExecuteArgs(batch_properties=Args(sep=',', header=False, index=False)))

    # Add control dependency, which means read_example job will start right after command line job finishes.
    af.stop_before_control_dependency(read_example_channel, cmd_job)

    transform_dag = 'simple_transform'
    af.deploy_to_airflow(project_root_path, dag_id=transform_dag)
    context = af.run(project_path=project_root_path,
                     dag_id=transform_dag,
                     scheduler_type=SchedulerType.AIRFLOW)
    def test_stream_evaluate_component(self):
        input_example_meta = af.register_example(
            name='batch_train_example',
            support_type=ExampleSupportType.EXAMPLE_BATCH)
        model_meta = af.register_model(model_name='mnist_model',
                                       model_type=ModelType.SAVED_MODEL)
        stream_evaluate_example_meta = af.register_example(
            name='stream_evaluate_example',
            support_type=ExampleSupportType.EXAMPLE_STREAM)
        stream_output_file = get_file_dir(__file__) + '/stream_evaluate'
        evaluate_output = af.register_artifact(name='stream_evaluate',
                                               stream_uri=stream_output_file)
        stream_evaluate_result_example_meta = af.register_example(
            name='stream_evaluate_result_example',
            support_type=ExampleSupportType.EXAMPLE_STREAM,
            stream_uri=stream_output_file)
        if os.path.exists(stream_output_file):
            os.remove(stream_output_file)
        with af.config(
                af.BaseJobConfig(platform='local',
                                 engine='python',
                                 job_name='stream_evaluate')):
            input_example = af.read_example(
                example_info=input_example_meta,
                executor=PythonObjectExecutor(
                    python_object=ReadBatchExample()))

            batch_train = af.train(input_data_list=[input_example],
                                   executor=PythonObjectExecutor(
                                       python_object=TrainBatchMnistModel()),
                                   model_info=model_meta)
            stream_evaluate_example = af.read_example(
                example_info=stream_evaluate_example_meta,
                executor=PythonObjectExecutor(
                    python_object=ReadStreamExample()))
            stream_evaluate = af.evaluate(
                input_data_list=[stream_evaluate_example],
                model_info=model_meta,
                executor=PythonObjectExecutor(
                    python_object=EvaluateStreamMnistModel()),
                output_num=1)
            af.write_example(input_data=stream_evaluate,
                             example_info=stream_evaluate_result_example_meta,
                             executor=PythonObjectExecutor(
                                 python_object=WriteStreamExample()))
        af.stop_before_control_dependency(stream_evaluate, batch_train)
        workflow_id = af.run(test_util.get_project_path())
        res = af.wait_workflow_execution_finished(workflow_id)
        self.assertEqual(0, res)
def run_workflow():
    build_workflow()
    af.set_project_config_file(project_path + '/project.yaml')

    # deploy a workflow which should be scheduled every 10 minutes
    default_args = '{\'schedule_interval\': %s, \'start_date\': \'%s\'}' % (
        '\'*/10 * * * *\'', datetime.utcnow())
    af.deploy_to_airflow(project_path,
                         dag_id='airflow_dag_example',
                         default_args=default_args)

    # Force trigger once
    context = af.run(project_path=project_path,
                     dag_id='airflow_dag_example',
                     scheduler_type=SchedulerType.AIRFLOW)
    print(context.dagrun_id)
    def test_run_pyflink_job(self):
        project_path = os.path.dirname(__file__) + '/../'
        af.set_project_config_file(project_path + "project.yaml")
        input_file = get_parent_dir(
            get_file_dir(__file__)) + '/resources/word_count.txt'
        output_file = get_file_dir(__file__) + "/word_count_output.csv"
        if os.path.exists(output_file):
            os.remove(output_file)

        example_1 = af.create_example(
            name="example_1",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=input_file,
            stream_uri=input_file,
            data_format="csv")

        example_2 = af.create_example(
            name="example_2",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=output_file,
            stream_uri=output_file,
            data_format="csv")
        flink_config = faf.LocalFlinkJobConfig()
        flink_config.local_mode = 'cluster'
        flink_config.flink_home = '/Users/chenwuchao/soft/apache/flink-1.11.0/'
        flink_config.set_table_env_create_func(TableEnvCreator())
        with af.config(flink_config):
            input_example = af.read_example(
                example_info=example_1,
                executor=faf.flink_executor.FlinkPythonExecutor(
                    python_object=Source()))
            processed = af.transform(
                input_data_list=[input_example],
                executor=faf.flink_executor.FlinkPythonExecutor(
                    python_object=Transformer()))

            af.write_example(input_data=processed,
                             example_info=example_2,
                             executor=faf.flink_executor.FlinkPythonExecutor(
                                 python_object=Sink()))
        workflow_id = af.run(project_path)
        res = af.wait_workflow_execution_finished(workflow_id)
 def test_read_example_with_numpy_npz(self):
     npy_name = 'test.npz'
     np.savez(npy_name, np.arange(10), np.sin(np.arange(10)))
     input_example_meta = af.register_example(
         name='input_numpy_example',
         data_type='numpy',
         data_format='npz',
         support_type=ExampleSupportType.EXAMPLE_BATCH,
         batch_uri=os.path.abspath(
             os.path.dirname(__file__) + "/" + npy_name))
     output_example_meta_first = af.register_example(
         name='ouput_numpy_example_1',
         data_type='numpy',
         data_format='npz',
         support_type=ExampleSupportType.EXAMPLE_BATCH,
         batch_uri=os.path.abspath(
             os.path.dirname(__file__) + '/numpy_output_1.npz'))
     output_example_meta_second = af.register_example(
         name='ouput_numpy_example_2',
         data_type='numpy',
         data_format='npz',
         support_type=ExampleSupportType.EXAMPLE_BATCH,
         batch_uri=os.path.abspath(
             os.path.dirname(__file__) + '/numpy_output_2.npz'))
     with af.config(
             af.BaseJobConfig(platform='local',
                              engine='python',
                              job_name='test_npz')):
         example_channel = af.read_example(example_info=input_example_meta)
         transform_channel = af.transform(
             input_data_list=[example_channel],
             executor=PythonObjectExecutor(
                 python_object=TransformTrainData()),
             output_num=2)
         af.write_example(input_data=transform_channel[0],
                          example_info=output_example_meta_first)
         af.write_example(input_data=transform_channel[1],
                          example_info=output_example_meta_second)
     workflow_id = af.run(test_util.get_project_path())
     res = af.wait_workflow_execution_finished(workflow_id)
     self.assertEqual(0, res)
Example #16
0
 def test_batch_predict_component(self):
     input_example_meta = af.register_example(
         name='input_train_example',
         support_type=ExampleSupportType.EXAMPLE_BOTH)
     model_meta = af.register_model(model_name='mnist_model',
                                    model_type=ModelType.SAVED_MODEL)
     batch_output_file = get_file_dir(__file__) + '/batch_predict'
     evaluate_output = af.register_artifact(name='batch_evaluate',
                                            batch_uri=batch_output_file)
     output_example_meta = af.register_example(
         name='output_result_example',
         support_type=ExampleSupportType.EXAMPLE_BATCH,
         data_type='numpy',
         data_format='txt',
         batch_uri=batch_output_file)
     if os.path.exists(batch_output_file):
         os.remove(batch_output_file)
     with af.config(
             af.BaseJobConfig(platform='local',
                              engine='python',
                              job_name='batch_predict')):
         batch_example = af.read_example(
             example_info=input_example_meta,
             executor=PythonObjectExecutor(
                 python_object=ReadBatchExample()))
         batch_train = af.train(input_data_list=[batch_example],
                                executor=PythonObjectExecutor(
                                    python_object=TrainBatchMnistModel()),
                                model_info=model_meta)
         batch_predict = af.predict(
             input_data_list=[batch_example],
             model_info=model_meta,
             executor=PythonObjectExecutor(
                 python_object=PredictBatchMnistModel()),
             output_num=1)
         af.write_example(input_data=batch_predict,
                          example_info=output_example_meta)
     af.stop_before_control_dependency(batch_predict, batch_train)
     workflow_id = af.run(test_util.get_project_path())
     res = af.wait_workflow_execution_finished(workflow_id)
     self.assertEqual(0, res)
Example #17
0
 def test_batch_train_component(self):
     input_example_meta = af.register_example(
         name='batch_train_example',
         support_type=ExampleSupportType.EXAMPLE_BATCH)
     model_meta = af.register_model(model_name='mnist_model',
                                    model_type=ModelType.SAVED_MODEL)
     with af.config(
             af.BaseJobConfig(platform='local',
                              engine='python',
                              job_name='batch_train')):
         input_example = af.read_example(
             example_info=input_example_meta,
             executor=PythonObjectExecutor(
                 python_object=ReadBatchExample()))
         af.train(input_data_list=[input_example],
                  executor=PythonObjectExecutor(
                      python_object=TrainBatchMnistModel()),
                  model_info=model_meta)
     workflow_id = af.run(test_util.get_project_path())
     res = af.wait_workflow_execution_finished(workflow_id)
     self.assertEqual(0, res)
    def test_batch_model_validate(self):
        input_example_meta = af.register_example(name='batch_train_example',
                                                 support_type=ExampleSupportType.EXAMPLE_BOTH)
        model_meta = af.register_model(model_name='mnist_model',
                                       model_type=ModelType.SAVED_MODEL)
        with af.config(af.BaseJobConfig(platform='local', engine='python', job_name='evaluate')):
            input_example = af.read_example(example_info=input_example_meta,
                                            executor=PythonObjectExecutor(python_object=ReadBatchExample()))

            batch_train = af.train(input_data_list=[input_example],
                                   executor=PythonObjectExecutor(python_object=TrainBatchMnistModel()),
                                   model_info=model_meta)

            model_validate = af.model_validate(input_data_list=[input_example],
                                               model_info=model_meta,
                                               executor=PythonObjectExecutor(python_object=BatchModelValidate()),
                                               output_num=0)
        af.stop_before_control_dependency(model_validate, batch_train)
        workflow_id = af.run(test_util.get_project_path())
        res = af.wait_workflow_execution_finished(workflow_id)
        self.assertEqual(0, res)
Example #19
0
 def run_airflow_dag_function(self):
     # waiting parsed dag file done
     from datetime import datetime
     ns_client = NotificationClient(server_uri='localhost:50051')
     with af.global_config_file(test_util.get_workflow_config_file()):
         with af.config('task_1'):
             cmd_executor = af.user_define_operation(
                 output_num=0,
                 executor=CmdExecutor(cmd_line=['echo "hello world!"']))
     af.deploy_to_airflow(test_util.get_project_path(),
                          dag_id='test_dag_111',
                          default_args={
                              'schedule_interval': None,
                              'start_date': datetime(2025, 12, 1),
                          })
     context = af.run(project_path=test_util.get_project_path(),
                      dag_id='test_dag_111',
                      scheduler_type=SchedulerType.AIRFLOW)
     print(context.dagrun_id)
     time.sleep(5)
     ns_client.send_event(StopSchedulerEvent(job_id=0).to_event())
    def test_run_pyflink_job(self):
        input_file = get_parent_dir(
            get_file_dir(__file__)) + '/resources/word_count.txt'
        output_file = get_file_dir(__file__) + "/word_count_output.csv"
        if os.path.exists(output_file):
            os.remove(output_file)

        example_1 = af.create_example(
            name="example_1",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=input_file,
            stream_uri=input_file,
            data_format="csv")

        example_2 = af.create_example(
            name="example_2",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=output_file,
            stream_uri=output_file,
            data_format="csv")
        flink_config = faf.LocalFlinkJobConfig()
        flink_config.local_mode = 'python'
        flink_config.set_table_env_create_func(TableEnvCreator())
        with af.config(flink_config):
            input_example = af.read_example(
                example_info=example_1,
                executor=faf.flink_executor.FlinkPythonExecutor(
                    python_object=Source()))
            processed = af.transform(
                input_data_list=[input_example],
                executor=faf.flink_executor.FlinkPythonExecutor(
                    python_object=Transformer()))

            af.write_example(input_data=processed,
                             example_info=example_2,
                             executor=faf.flink_executor.FlinkPythonExecutor(
                                 python_object=Sink()))
        workflow_id = af.run(test_util.get_project_path())
        res = af.wait_workflow_execution_finished(workflow_id)
        self.assertEqual(0, res)
Example #21
0
def run_project(project_root_path):

    af.set_project_config_file(project_root_path + "/project.yaml")
    project_name = af.project_config().get_project_name()
    artifact_prefix = project_name + "."

    validate_trigger = af.external_trigger(name='validate')
    push_trigger = af.external_trigger(name='push')

    with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'):
        # the config of train job is a periodic job  which means it will
        # run every `interval`(defined in workflow_config.yaml) seconds
        with af.config('train_job'):
            # Register metadata raw training data(example) and read example(i.e. training dataset)
            train_example = af.register_example(name=artifact_prefix + 'train_example',
                                                support_type=ExampleSupportType.EXAMPLE_BATCH,
                                                batch_uri=EXAMPLE_URI.format('train'))
            train_read_example = af.read_example(example_info=train_example,
                                                 executor=PythonObjectExecutor(python_object=ExampleReader()))

            # Transform(preprocessing) example
            train_transform = af.transform(input_data_list=[train_read_example],
                                           executor=PythonObjectExecutor(python_object=ExampleTransformer()))

            # Register model metadata and train model
            train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression',
                                            model_type=ModelType.SAVED_MODEL,
                                            model_desc='logistic regression model')
            train_channel = af.train(input_data_list=[train_transform],
                                     executor=PythonObjectExecutor(python_object=ModelTrainer()),
                                     model_info=train_model)
        with af.config('validate_job'):
            # Validation of model
            # Read validation dataset and validate model before it is used to predict

            validate_example = af.register_example(name=artifact_prefix + 'validate_example',
                                                   support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                   batch_uri=EXAMPLE_URI.format('evaluate'))
            validate_read_example = af.read_example(example_info=validate_example,
                                                    executor=PythonObjectExecutor(
                                                        python_object=ValidateExampleReader()))
            validate_transform = af.transform(input_data_list=[validate_read_example],
                                              executor=PythonObjectExecutor(python_object=ValidateTransformer()))
            validate_artifact_name = artifact_prefix + 'validate_artifact'
            validate_artifact = af.register_artifact(name=validate_artifact_name,
                                                     batch_uri=get_file_dir(__file__) + '/validate_result')
            validate_channel = af.model_validate(input_data_list=[validate_transform],
                                                 model_info=train_model,
                                                 executor=PythonObjectExecutor(
                                                     python_object=ModelValidator(validate_artifact_name)))
        with af.config('push_job'):
            # Push model to serving
            # Register metadata of pushed model
            push_model_artifact_name = artifact_prefix + 'push_model_artifact'
            push_model_artifact = af.register_artifact(name=push_model_artifact_name,
                                                       batch_uri=get_file_dir(__file__) + '/pushed_model')
            push_channel = af.push_model(model_info=train_model,
                                         executor=PythonObjectExecutor(
                                            python_object=ModelPusher(push_model_artifact_name)))

        with af.config('predict_job'):
            # Prediction(Inference)
            predict_example = af.register_example(name=artifact_prefix + 'predict_example',
                                                  support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                  stream_uri=EXAMPLE_URI.format('predict'))
            predict_read_example = af.read_example(example_info=predict_example,
                                                   executor=PythonObjectExecutor(python_object=PredictExampleReader()))
            predict_transform = af.transform(input_data_list=[predict_read_example],
                                             executor=PythonObjectExecutor(python_object=PredictTransformer()))
            predict_channel = af.predict(input_data_list=[predict_transform],
                                         model_info=train_model,
                                         executor=PythonObjectExecutor(python_object=ModelPredictor()))
            # Save prediction result
            write_example = af.register_example(name=artifact_prefix + 'write_example',
                                                support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                stream_uri=get_file_dir(__file__) + '/predict_result')
            af.write_example(input_data=predict_channel,
                             example_info=write_example,
                             executor=PythonObjectExecutor(python_object=ExampleWriter()))

        # Define relation graph connected by control edge:
        # Once a round of training is done, validator will be launched and
        # pusher will be launched if the new model is better.
        # Prediction will start once the first round of training is done and
        # when pusher pushes(deploys) a new model, the predictor will use the latest deployed model as well.
        af.model_version_control_dependency(src=validate_channel,
                                            model_version_event_type=ModelVersionEventType.MODEL_GENERATED,
                                            dependency=validate_trigger, model_name=train_model.name)
        af.model_version_control_dependency(src=push_channel,
                                            model_version_event_type=ModelVersionEventType.MODEL_VALIDATED,
                                            dependency=push_trigger, model_name=train_model.name)

    # Run workflow
    transform_dag = project_name
    af.deploy_to_airflow(project_root_path, dag_id=transform_dag)
    af.run(project_path=project_root_path,
           dag_id=transform_dag,
           scheduler_type=SchedulerType.AIRFLOW)
Example #22
0
def run_workflow():
    """
    Run the user-defined workflow definition.
    """
    train_data_file, predict_result_directory, merge_predict_result_path, \
    first_test_data_file, first_result_data_file = collect_data_file()
    # Prepare workflow: Example & Model Metadata registration.
    train_example_meta, predict_result_meta, merge_data_meta, first_test_example_meta, second_test_example_meta, \
    first_result_example_meta, second_result_example_meta, train_model_meta = \
        prepare_workflow(train_data_file=train_data_file,
                         predict_result_directory=predict_result_directory,
                         merge_predict_result_path=merge_predict_result_path,
                         first_test_data_file=first_test_data_file,
                         first_result_data_file=first_result_data_file)

    # Save proxima indexes under the following index path.
    index_path = '{}/codes/{}/'.format(os.environ['ENV_HOME'], os.environ['TASK_ID']) + 'test.index'

    # Set Python job config to train model.
    python_job_config_0 = BaseJobConfig(platform='local', engine='python', job_name='train')

    python_job_config_1 = BaseJobConfig(platform='local', engine='python', job_name='start_cluster_serving')

    python_job_config_2 = BaseJobConfig(platform='local', engine='python', job_name='merge_predict_result')

    # Set Flink job config to predict with cluster serving
    global_job_config_1 = LocalFlinkJobConfig()
    global_job_config_1.local_mode = 'cluster'
    global_job_config_1.flink_home = os.environ['FLINK_HOME']
    global_job_config_1.job_name = 'cluster_serving'
    global_job_config_1.set_table_env_create_func(StreamTableEnvCreatorBuildIndex())

    # Set Flink job config to build index.
    global_job_config_2 = LocalFlinkJobConfig()
    global_job_config_2.local_mode = 'cluster'
    global_job_config_2.flink_home = os.environ['FLINK_HOME']
    global_job_config_2.job_name = 'build_index'
    global_job_config_2.set_table_env_create_func(StreamTableEnvCreator())

    # Set Flink job config to fink sick.
    global_job_config_3 = LocalFlinkJobConfig()
    global_job_config_3.local_mode = 'cluster'
    global_job_config_3.flink_home = os.environ['FLINK_HOME']
    global_job_config_3.job_name = 'find_sick'
    global_job_config_3.set_table_env_create_func(StreamTableEnvCreator())

    # Set Flink job config to online cluster.
    global_job_config_4 = LocalFlinkJobConfig()
    global_job_config_4.local_mode = 'cluster'
    global_job_config_4.flink_home = os.environ['FLINK_HOME']
    global_job_config_4.job_name = 'online_cluster'
    global_job_config_4.set_table_env_create_func(StreamTableEnvCreator())

    with af.config(python_job_config_0):
        # Under first job config, we construct the first job, the job is going to train an auto_encoder model.
        python_job_0_read_train_example = af.read_example(example_info=train_example_meta,
                                                          executor=PythonObjectExecutor(python_object=ReadCsvExample()))
        python_job_0_train_model = af.train(input_data_list=[python_job_0_read_train_example],
                                            executor=PythonObjectExecutor(python_object=TrainAutoEncoder()),
                                            model_info=train_model_meta,
                                            name='trainer_0')

    with af.config(python_job_config_1):
        python_job_1_cluster_serving_channel = af.cluster_serving(model_info=train_model_meta, parallelism=2)
        # python_job_1_cluster_serving_channel = af.cluster_serving(model_info=train_model_meta, parallelism=16)

    with af.config(global_job_config_1):
        flink_job_0_read_train_example = af.read_example(example_info=train_example_meta,
                                                         executor=FlinkPythonExecutor(python_object=ReadTrainExample()))
        flink_job_0_predict_model = af.predict(input_data_list=[flink_job_0_read_train_example],
                                               model_info=train_model_meta,
                                               executor=FlinkPythonExecutor(
                                                   python_object=PredictAutoEncoderWithTrain()))
        flink_job_0_write_predict_data = af.write_example(input_data=flink_job_0_predict_model,
                                                          example_info=predict_result_meta,
                                                          executor=FlinkPythonExecutor(
                                                              python_object=WritePredictResult()))

    with af.config(python_job_config_2):
        python_job_2_merge_train_data_file = af.user_define_operation(executor=PythonObjectExecutor(
            python_object=MergePredictResult()))

    with af.config(global_job_config_2):
        flink_job_1_read_train_example = af.read_example(example_info=merge_data_meta,
                                                         executor=FlinkPythonExecutor(python_object=ReadMergeExample()))
        flink_job_1_build_index_channel = af.transform([flink_job_1_read_train_example],
                                                       executor=FlinkPythonExecutor(
                                                           python_object=BuildIndexExecutor(index_path, FloatDataType(),
                                                                                            128)))

    with af.config(global_job_config_3):
        flink_job_2_read_history_example = af.read_example(example_info=first_test_example_meta,
                                                           executor=FlinkPythonExecutor(
                                                               python_object=ReadPredictExample()))
        flink_job_2_predict_model = af.predict(input_data_list=[flink_job_2_read_history_example],
                                               model_info=train_model_meta,
                                               executor=FlinkPythonExecutor(python_object=PredictAutoEncoder()))
        flink_job_2_transformed_data = af.transform([flink_job_2_predict_model],
                                                    executor=FlinkPythonExecutor(
                                                        python_object=SearchExecutor(index_path, FloatDataType(), 2)))
        flink_job_2_read_train_example = af.read_example(example_info=train_example_meta,
                                                         executor=FlinkPythonExecutor(python_object=ReadTrainExample()))
        flink_job_2_join_channel = af.transform(
            input_data_list=[flink_job_2_transformed_data, flink_job_2_read_train_example],
            executor=FlinkPythonExecutor(python_object=FindHistory()))
        flink_job_2_write_result = af.write_example(input_data=flink_job_2_join_channel,
                                                    example_info=first_result_example_meta,
                                                    executor=FlinkPythonExecutor(python_object=SearchSink()))

    with af.config(global_job_config_4):
        flink_job_3_read_online_example = af.read_example(example_info=second_test_example_meta,
                                                    executor=FlinkPythonExecutor(
                                                        python_object=ReadOnlinePredictExample()))
        flink_job_3_predict_model = af.predict(input_data_list=[flink_job_3_read_online_example],
                                         model_info=train_model_meta,
                                         executor=FlinkPythonExecutor(python_object=OnlinePredictAutoEncoder()))
        flink_job_3_transformed_data = af.transform([flink_job_3_predict_model],
                                              executor=FlinkPythonExecutor(
                                                  python_object=SearchExecutor3(index_path, FloatDataType(), 2)))
        af.write_example(input_data=flink_job_3_transformed_data,
                         example_info=second_result_example_meta,
                         executor=FlinkPythonExecutor(python_object=WriteSecondResult()))

    af.stop_before_control_dependency(python_job_1_cluster_serving_channel, python_job_0_train_model)
    af.stop_before_control_dependency(flink_job_0_read_train_example, python_job_1_cluster_serving_channel)
    af.stop_before_control_dependency(python_job_2_merge_train_data_file, flink_job_0_read_train_example)
    af.stop_before_control_dependency(flink_job_1_build_index_channel, python_job_2_merge_train_data_file)
    af.stop_before_control_dependency(flink_job_2_read_history_example, flink_job_1_build_index_channel)
    af.stop_before_control_dependency(flink_job_3_read_online_example, flink_job_2_write_result)
    workflow_id = af.run(get_project_path()+'/')
    res = af.wait_workflow_execution_finished(workflow_id)
    sys.exit(res)
Example #23
0
def run_workflow():
    """
    Run the user-defined workflow definition.
    """
    train_example_meta, label_example_meta, test_example_meta, test_output_example_meta, train_model_meta = prepare_workflow(
    )

    python_job_config_0 = BaseJobConfig(job_name='read_train',
                                        platform='local',
                                        engine='python')

    python_job_config_1 = BaseJobConfig(job_name='train',
                                        platform='local',
                                        engine='python')

    flink_job_config_2 = LocalFlinkJobConfig()
    flink_job_config_2.job_name = 'test'
    flink_job_config_2.local_mode = 'python'
    flink_job_config_2.flink_home = os.environ['FLINK_HOME']
    flink_job_config_2.set_table_env_create_func(MyStreamTableEnvCreator())

    with af.config(python_job_config_0):
        python_job_0_read_train_data = af.read_example(
            example_info=train_example_meta,
            executor=PythonObjectExecutor(python_object=ReadTrainCsvExample()))

        python_job_0_read_label_data = af.read_example(
            example_info=label_example_meta,
            executor=PythonObjectExecutor(python_object=ReadLabelCsvExample()))

        write_train_data_example = af.register_example(
            name='write_train_data',
            support_type=ExampleSupportType.EXAMPLE_BATCH,
            data_type='pandas',
            data_format='csv',
            batch_uri='/tmp/write_train_data.csv')

        python_job_0_write_train_result = af.write_example(
            input_data=python_job_0_read_train_data,
            example_info=write_train_data_example,
            executor=PythonObjectExecutor(
                python_object=WriteTrainCsvExample()))

    with af.config(python_job_config_1):
        python_job_1_train_model = af.train(
            name='trainer_0',
            input_data_list=[
                python_job_0_read_train_data, python_job_0_read_label_data
            ],
            executor=PythonObjectExecutor(python_object=TrainModel()),
            model_info=train_model_meta)

    with af.config(flink_job_config_2):
        flink_job_2_read_test_data = af.read_example(
            example_info=test_example_meta,
            executor=FlinkPythonExecutor(python_object=ReadTestCsvExample()))

        flink_job_2_predict_test_data = af.transform(
            input_data_list=[flink_job_2_read_test_data],
            executor=FlinkPythonExecutor(
                python_object=PredictTestLabelExecutor()))

        write_result = af.write_example(
            input_data=flink_job_2_predict_test_data,
            example_info=test_output_example_meta,
            executor=FlinkPythonExecutor(
                python_object=WritePredictTestExample()))

    af.stop_before_control_dependency(python_job_1_train_model,
                                      python_job_0_write_train_result)
    af.stop_before_control_dependency(write_result, python_job_1_train_model)
    workflow_id = af.run(get_project_path() + '/')
    res = af.wait_workflow_execution_finished(workflow_id)
    sys.exit(res)
Example #24
0
def run_project(project_root_path):
    af.set_project_config_file(project_root_path + "/project.yaml")
    project_name = af.project_config().get_project_name()
    artifact_prefix = project_name + "."

    validate_trigger = af.external_trigger(name='validate')
    push_trigger = af.external_trigger(name='push')

    with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'):
        with af.config('train_job'):
            train_example = af.register_example(name=artifact_prefix + 'train_example',
                                                support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                stream_uri=EXAMPLE_URI.format('train'))
            train_read_example = af.read_example(example_info=train_example,
                                                 executor=PythonObjectExecutor(python_object=TrainExampleReader()))
            train_transform = af.transform(input_data_list=[train_read_example],
                                           executor=PythonObjectExecutor(python_object=TrainExampleTransformer()))
            train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression',
                                            model_type=ModelType.SAVED_MODEL,
                                            model_desc='logistic regression model')
            train_channel = af.train(input_data_list=[train_transform],
                                     executor=PythonObjectExecutor(python_object=ModelTrainer()),
                                     model_info=train_model)
        with af.config('validate_job'):
            validate_example = af.register_example(name=artifact_prefix + 'validate_example',
                                                   support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                   stream_uri=EXAMPLE_URI.format('evaluate'),
                                                   data_format='npz')
            validate_read_example = af.read_example(example_info=validate_example,
                                                    executor=PythonObjectExecutor(
                                                        python_object=ValidateExampleReader()))
            validate_transform = af.transform(input_data_list=[validate_read_example],
                                              executor=PythonObjectExecutor(python_object=ValidateTransformer()))
            validate_artifact_name = artifact_prefix + 'validate_artifact'
            validate_artifact = af.register_artifact(name=validate_artifact_name,
                                                     stream_uri=get_file_dir(__file__) + '/validate_result')
            validate_channel = af.model_validate(input_data_list=[validate_transform],
                                                 model_info=train_model,
                                                 executor=PythonObjectExecutor(
                                                     python_object=ModelValidator(validate_artifact_name)),
                                                 )
        with af.config('push_job'):
            # Push model to serving
            # Register metadata of pushed model
            push_model_artifact_name = artifact_prefix + 'push_model_artifact'
            push_model_artifact = af.register_artifact(name=push_model_artifact_name,
                                                       stream_uri=get_file_dir(__file__) + '/pushed_model')
            push_channel = af.push_model(model_info=train_model,
                                         executor=PythonObjectExecutor(
                                             python_object=ModelPusher(push_model_artifact_name)))

        with af.config('predict_job'):
            predict_example = af.register_example(name=artifact_prefix + 'predict_example',
                                                  support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                  stream_uri=EXAMPLE_URI.format('predict'))
            predict_read_example = af.read_example(example_info=predict_example,
                                                   executor=PythonObjectExecutor(python_object=PredictExampleReader()))
            predict_transform = af.transform(input_data_list=[predict_read_example],
                                             executor=PythonObjectExecutor(python_object=PredictTransformer()))
            predict_channel = af.predict(input_data_list=[predict_transform],
                                         model_info=train_model,
                                         executor=PythonObjectExecutor(python_object=ModelPredictor()))

            write_example = af.register_example(name=artifact_prefix + 'export_example',
                                                support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                stream_uri=get_file_dir(__file__) + '/predict_result')
            af.write_example(input_data=predict_channel,
                             example_info=write_example,
                             executor=PythonObjectExecutor(python_object=ExampleWriter()))

        af.model_version_control_dependency(src=validate_channel,
                                            model_version_event_type=ModelVersionEventType.MODEL_GENERATED,
                                            dependency=validate_trigger, model_name=train_model.name)

        af.model_version_control_dependency(src=push_channel,
                                            model_version_event_type=ModelVersionEventType.MODEL_VALIDATED,
                                            dependency=push_trigger, model_name=train_model.name)
    # Run workflow
    transform_dag = project_name
    af.deploy_to_airflow(project_root_path, dag_id=transform_dag)
    af.run(project_path=project_root_path,
           dag_id=transform_dag,
           scheduler_type=SchedulerType.AIRFLOW)
Example #25
0
def run_workflow(root_dir_path, project_yaml_path):
    ai_flow.set_project_config_file(project_yaml_path)
    res = ai_flow.run(root_dir_path,
                      dag_id='hello_world_example',
                      scheduler_type=ai_flow.SchedulerType.AIFLOW)
    ai_flow.wait_workflow_execution_finished(res)