Python register_modelの例、ai_flow.register_model Pythonの例

コード例 #1

0

ファイルを表示

    def test_batch_train_component_with_an_output(self):
        input_example_meta = af.register_example(
            name='batch_train_example',
            support_type=ExampleSupportType.EXAMPLE_BATCH)
        model_meta = af.register_model(model_name='mnist_model',
                                       model_type=ModelType.SAVED_MODEL)

        example_meta = af.register_example(
            name='output_example',
            support_type=ExampleSupportType.EXAMPLE_BATCH,
            data_type='numpy',
            data_format='npz',
            batch_uri=os.path.abspath(
                os.path.dirname(__file__) + '/numpy_output.npz'))
        with af.config(
                af.BaseJobConfig(platform='local',
                                 engine='python',
                                 job_name='batch_train')):
            input_example = af.read_example(
                example_info=input_example_meta,
                executor=PythonObjectExecutor(
                    python_object=ReadBatchExample()))
            train_channel = af.train(
                input_data_list=[input_example],
                executor=PythonObjectExecutor(
                    python_object=TrainBatchMnistModelWithOutput()),
                model_info=model_meta,
                output_num=1)
            af.write_example(input_data=train_channel,
                             example_info=example_meta)
        workflow_id = af.run(test_util.get_project_path())
        res = af.wait_workflow_execution_finished(workflow_id)
        self.assertEqual(0, res)

コード例 #2

0

ファイルを表示

 def test_stream_train_component(self):
     batch_input_example_meta = af.register_example(
         name='stream_train_example',
         support_type=ExampleSupportType.EXAMPLE_BOTH)
     model_meta = af.register_model(model_name='mnist_model',
                                    model_type=ModelType.SAVED_MODEL)
     stream_input_example_meta = af.register_example(
         name='stream_train_example',
         support_type=ExampleSupportType.EXAMPLE_BOTH)
     with af.config(
             af.BaseJobConfig(platform='local',
                              engine='python',
                              job_name='stream_train')):
         batch_input_example = af.read_example(
             example_info=batch_input_example_meta,
             executor=PythonObjectExecutor(
                 python_object=ReadBatchExample()))
         batch_train = af.train(input_data_list=[batch_input_example],
                                executor=PythonObjectExecutor(
                                    python_object=TrainBatchMnistModel()),
                                model_info=model_meta)
         stream_input_example = af.read_example(
             example_info=stream_input_example_meta,
             executor=PythonObjectExecutor(
                 python_object=ReadStreamExample()))
         stream_train = af.train(input_data_list=[stream_input_example],
                                 executor=PythonObjectExecutor(
                                     python_object=TrainStreamMnistModel()),
                                 model_info=model_meta)
     af.stop_before_control_dependency(stream_train, batch_train)
     workflow_id = af.run(test_util.get_project_path())
     res = af.wait_workflow_execution_finished(workflow_id)
     self.assertEqual(0, res)

コード例 #3

0

ファイルを表示

def prepare_workflow():
    data_set_dir, output_dir = DataSets().collect_data_file_dir()
    """
    Prepare workflow: Example & Model Metadata registration.
    """
    train_example_meta: ExampleMeta = af.register_example(
        name='train_data',
        support_type=ExampleSupportType.EXAMPLE_BATCH,
        data_type='pandas',
        data_format='csv',
        batch_uri=data_set_dir + '/train_data.csv')
    label_example_meta: ExampleMeta = af.register_example(
        name='label_data',
        support_type=ExampleSupportType.EXAMPLE_BATCH,
        data_type='pandas',
        data_format='csv',
        batch_uri=data_set_dir + '/label_data.csv')
    test_example_meta: ExampleMeta = af.register_example(
        name='test_data',
        support_type=ExampleSupportType.EXAMPLE_BATCH,
        data_type='pandas',
        data_format='csv',
        batch_uri=data_set_dir + '/test_data.csv')
    test_output_example_meta: ExampleMeta = af.register_example(
        name='test_output_data',
        support_type=ExampleSupportType.EXAMPLE_STREAM,
        data_type='kafka',
        data_format='csv',
        stream_uri='localhost:9092')
    train_model_meta: ModelMeta = af.register_model(
        model_name='label_model', model_type=ModelType.SAVED_MODEL)
    return train_example_meta, label_example_meta, test_example_meta, test_output_example_meta, train_model_meta

コード例 #4

0

ファイルを表示

    def test_run_model_version_job(self):
        project_name = 'test_project'
        workflow_name = 'test_workflow'
        dag_id = '{}.{}'.format(project_name, workflow_name)
        train_model = af.register_model(model_name='model_1',
                                        model_type=af.ModelType.SAVED_MODEL,
                                        model_desc='test model')

        def run_task_function(client: NotificationClient):
            with af.global_config_file(workflow_config_file()):
                with af.config('task_2'):
                    executor_1 = af.user_define_operation(
                        af.PythonObjectExecutor(SimpleExecutor()))
                with af.config('task_3'):
                    executor_2 = af.user_define_operation(
                        af.PythonObjectExecutor(SimpleExecutor()))
                af.model_version_control_dependency(
                    src=executor_2,
                    dependency=executor_1,
                    model_name='model_1',
                    model_version_event_type=ModelVersionEventType.
                    MODEL_GENERATED)
                workflow_info = af.workflow_operation.submit_workflow(
                    workflow_name)

            af.workflow_operation.start_new_workflow_execution(workflow_name)
            r_flag = True
            while True:
                with create_session() as session:
                    tes2 = session.query(TaskExecution).filter(
                        TaskExecution.dag_id == 'test_project.test_workflow',
                        TaskExecution.task_id == 'task_2').all()
                    if len(tes2) == 1 and r_flag:
                        af.register_model_version(
                            model='model_1',
                            model_path='/tmp/model/v1',
                            current_stage=af.ModelVersionStage.GENERATED)
                        r_flag = False

                    dag_run = session.query(DagRun).filter(
                        DagRun.dag_id == 'test_project.test_workflow').first()
                    if dag_run is not None and dag_run.state in State.finished:
                        break
                    else:
                        time.sleep(1)

        self.run_ai_flow(dag_id, run_task_function)
        with create_session() as session:
            tes = session.query(TaskExecution).filter(
                TaskExecution.dag_id == 'test_project.test_workflow',
                TaskExecution.task_id == 'task_2').all()
            self.assertEqual(1, len(tes))

コード例 #5

0

ファイルを表示

ファイル: test_python_evaluate_component.py プロジェクト: zuiwanting/flink-ai-extended

    def test_stream_evaluate_component(self):
        input_example_meta = af.register_example(
            name='batch_train_example',
            support_type=ExampleSupportType.EXAMPLE_BATCH)
        model_meta = af.register_model(model_name='mnist_model',
                                       model_type=ModelType.SAVED_MODEL)
        stream_evaluate_example_meta = af.register_example(
            name='stream_evaluate_example',
            support_type=ExampleSupportType.EXAMPLE_STREAM)
        stream_output_file = get_file_dir(__file__) + '/stream_evaluate'
        evaluate_output = af.register_artifact(name='stream_evaluate',
                                               stream_uri=stream_output_file)
        stream_evaluate_result_example_meta = af.register_example(
            name='stream_evaluate_result_example',
            support_type=ExampleSupportType.EXAMPLE_STREAM,
            stream_uri=stream_output_file)
        if os.path.exists(stream_output_file):
            os.remove(stream_output_file)
        with af.config(
                af.BaseJobConfig(platform='local',
                                 engine='python',
                                 job_name='stream_evaluate')):
            input_example = af.read_example(
                example_info=input_example_meta,
                executor=PythonObjectExecutor(
                    python_object=ReadBatchExample()))

            batch_train = af.train(input_data_list=[input_example],
                                   executor=PythonObjectExecutor(
                                       python_object=TrainBatchMnistModel()),
                                   model_info=model_meta)
            stream_evaluate_example = af.read_example(
                example_info=stream_evaluate_example_meta,
                executor=PythonObjectExecutor(
                    python_object=ReadStreamExample()))
            stream_evaluate = af.evaluate(
                input_data_list=[stream_evaluate_example],
                model_info=model_meta,
                executor=PythonObjectExecutor(
                    python_object=EvaluateStreamMnistModel()),
                output_num=1)
            af.write_example(input_data=stream_evaluate,
                             example_info=stream_evaluate_result_example_meta,
                             executor=PythonObjectExecutor(
                                 python_object=WriteStreamExample()))
        af.stop_before_control_dependency(stream_evaluate, batch_train)
        workflow_id = af.run(test_util.get_project_path())
        res = af.wait_workflow_execution_finished(workflow_id)
        self.assertEqual(0, res)

コード例 #6

0

ファイルを表示

ファイル: test_project.py プロジェクト: zuiwanting/flink-ai-extended

    def test_stream_with_external_trigger_with_model_control(self):
        print(sys._getframe().f_code.co_name)
        model_name = 'test_create_model_version'
        model_desc = 'test create model version'
        response = af.register_model(model_name=model_name,
                                     model_type=af.ModelType.CHECKPOINT,
                                     model_desc=model_desc)

        trigger = af.external_trigger(name='stream_trigger')
        job_config = af.BaseJobConfig('local', 'cmd_line')
        job_config.job_name = 'test_cmd'
        with af.config(job_config):
            cmd_executor = af.user_define_operation(
                output_num=0,
                executor=CmdExecutor(
                    cmd_line="echo 'hello world' && sleep {}".format(1)))
        af.model_version_control_dependency(
            src=cmd_executor,
            dependency=trigger,
            model_name=model_name,
            model_version_event_type='MODEL_DEPLOYED')
        workflow_id = af.submit_ai_flow()

        model_path1 = 'fs://source1.pkl'
        model_metric1 = 'http://metric1'
        model_flavor1 = '{"flavor.version":1}'
        version_desc1 = 'test create model version1'
        time.sleep(1)
        response = af.register_model_version(
            model=model_name,
            model_path=model_path1,
            model_metric=model_metric1,
            model_flavor=model_flavor1,
            version_desc=version_desc1,
            current_stage=af.ModelVersionStage.DEPLOYED)
        time.sleep(5)
        response = af.register_model_version(
            model=model_name,
            model_path=model_path1,
            model_metric=model_metric1,
            model_flavor=model_flavor1,
            version_desc=version_desc1,
            current_stage=af.ModelVersionStage.DEPLOYED)
        time.sleep(10)
        af.stop_execution_by_id(workflow_id)
        res = af.get_ai_flow_client().list_job(5, 0)
        self.assertEqual(3, len(res))

コード例 #7

0

ファイルを表示

ファイル: tianchi_main.py プロジェクト: Lvnszn/flink_ai

def prepare_workflow(train_data_file: str, predict_result_directory: str,
                     merge_predict_result_path: str, first_test_data_file: str,
                     first_result_data_file: str):
    """
    Prepare workflow: Example & Model Metadata registration.
    """
    train_example_meta: ExampleMeta = af.register_example(name='train_data',
                                                          support_type=ExampleSupportType.EXAMPLE_BATCH,
                                                          data_type='pandas',
                                                          data_format='csv',
                                                          batch_uri=train_data_file)
    predict_result_meta: ExampleMeta = af.register_example(name='predict_result',
                                                           support_type=ExampleSupportType.EXAMPLE_BATCH,
                                                           batch_uri=predict_result_directory)
    merge_data_meta: ExampleMeta = af.register_example(name='merge_data',
                                                       support_type=ExampleSupportType.EXAMPLE_BATCH,
                                                       batch_uri=merge_predict_result_path)
    first_test_example_meta: ExampleMeta = af.register_example(name='first_test_data',
                                                               support_type=ExampleSupportType.EXAMPLE_BATCH,
                                                               data_type='pandas',
                                                               data_format='csv',
                                                               batch_uri=first_test_data_file)
    second_test_example_data: ExampleMeta = af.register_example(name='second_test_data',
                                                                support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                                data_type='kafka',
                                                                data_format='csv',
                                                                stream_uri='localhost:9092')
    first_result_example_meta: ExampleMeta = af.register_example(name='first_result_111',
                                                                 support_type=ExampleSupportType.EXAMPLE_BATCH,
                                                                 data_type='pandas',
                                                                 data_format='csv',
                                                                 batch_uri=first_result_data_file)
    second_result_example_meta: ExampleMeta = af.register_example(name='second_result_111',
                                                                  support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                                  data_type='kafka',
                                                                  data_format='csv',
                                                                  stream_uri='localhost:9092')
    train_model_meta: ModelMeta = af.register_model(model_name='auto_encoder',
                                                    model_type=ModelType.SAVED_MODEL)
    return train_example_meta, predict_result_meta, merge_data_meta, \
           first_test_example_meta, second_test_example_data, \
           first_result_example_meta, second_result_example_meta, train_model_meta

コード例 #8

0

ファイルを表示

 def test_batch_predict_component(self):
     input_example_meta = af.register_example(
         name='input_train_example',
         support_type=ExampleSupportType.EXAMPLE_BOTH)
     model_meta = af.register_model(model_name='mnist_model',
                                    model_type=ModelType.SAVED_MODEL)
     batch_output_file = get_file_dir(__file__) + '/batch_predict'
     evaluate_output = af.register_artifact(name='batch_evaluate',
                                            batch_uri=batch_output_file)
     output_example_meta = af.register_example(
         name='output_result_example',
         support_type=ExampleSupportType.EXAMPLE_BATCH,
         data_type='numpy',
         data_format='txt',
         batch_uri=batch_output_file)
     if os.path.exists(batch_output_file):
         os.remove(batch_output_file)
     with af.config(
             af.BaseJobConfig(platform='local',
                              engine='python',
                              job_name='batch_predict')):
         batch_example = af.read_example(
             example_info=input_example_meta,
             executor=PythonObjectExecutor(
                 python_object=ReadBatchExample()))
         batch_train = af.train(input_data_list=[batch_example],
                                executor=PythonObjectExecutor(
                                    python_object=TrainBatchMnistModel()),
                                model_info=model_meta)
         batch_predict = af.predict(
             input_data_list=[batch_example],
             model_info=model_meta,
             executor=PythonObjectExecutor(
                 python_object=PredictBatchMnistModel()),
             output_num=1)
         af.write_example(input_data=batch_predict,
                          example_info=output_example_meta)
     af.stop_before_control_dependency(batch_predict, batch_train)
     workflow_id = af.run(test_util.get_project_path())
     res = af.wait_workflow_execution_finished(workflow_id)
     self.assertEqual(0, res)

コード例 #9

0

ファイルを表示

 def test_batch_train_component(self):
     input_example_meta = af.register_example(
         name='batch_train_example',
         support_type=ExampleSupportType.EXAMPLE_BATCH)
     model_meta = af.register_model(model_name='mnist_model',
                                    model_type=ModelType.SAVED_MODEL)
     with af.config(
             af.BaseJobConfig(platform='local',
                              engine='python',
                              job_name='batch_train')):
         input_example = af.read_example(
             example_info=input_example_meta,
             executor=PythonObjectExecutor(
                 python_object=ReadBatchExample()))
         af.train(input_data_list=[input_example],
                  executor=PythonObjectExecutor(
                      python_object=TrainBatchMnistModel()),
                  model_info=model_meta)
     workflow_id = af.run(test_util.get_project_path())
     res = af.wait_workflow_execution_finished(workflow_id)
     self.assertEqual(0, res)

コード例 #10

0

ファイルを表示

ファイル: test_python_model_validate_component.py プロジェクト: zuiwanting/flink-ai-extended

    def test_batch_model_validate(self):
        input_example_meta = af.register_example(name='batch_train_example',
                                                 support_type=ExampleSupportType.EXAMPLE_BOTH)
        model_meta = af.register_model(model_name='mnist_model',
                                       model_type=ModelType.SAVED_MODEL)
        with af.config(af.BaseJobConfig(platform='local', engine='python', job_name='evaluate')):
            input_example = af.read_example(example_info=input_example_meta,
                                            executor=PythonObjectExecutor(python_object=ReadBatchExample()))

            batch_train = af.train(input_data_list=[input_example],
                                   executor=PythonObjectExecutor(python_object=TrainBatchMnistModel()),
                                   model_info=model_meta)

            model_validate = af.model_validate(input_data_list=[input_example],
                                               model_info=model_meta,
                                               executor=PythonObjectExecutor(python_object=BatchModelValidate()),
                                               output_num=0)
        af.stop_before_control_dependency(model_validate, batch_train)
        workflow_id = af.run(test_util.get_project_path())
        res = af.wait_workflow_execution_finished(workflow_id)
        self.assertEqual(0, res)

コード例 #11

0

ファイルを表示

def run_project(project_root_path):
    af.set_project_config_file(project_root_path + "/project.yaml")
    project_name = af.project_config().get_project_name()
    artifact_prefix = project_name + "."

    validate_trigger = af.external_trigger(name='validate')
    push_trigger = af.external_trigger(name='push')

    with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'):
        with af.config('train_job'):
            train_example = af.register_example(name=artifact_prefix + 'train_example',
                                                support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                stream_uri=EXAMPLE_URI.format('train'))
            train_read_example = af.read_example(example_info=train_example,
                                                 executor=PythonObjectExecutor(python_object=TrainExampleReader()))
            train_transform = af.transform(input_data_list=[train_read_example],
                                           executor=PythonObjectExecutor(python_object=TrainExampleTransformer()))
            train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression',
                                            model_type=ModelType.SAVED_MODEL,
                                            model_desc='logistic regression model')
            train_channel = af.train(input_data_list=[train_transform],
                                     executor=PythonObjectExecutor(python_object=ModelTrainer()),
                                     model_info=train_model)
        with af.config('validate_job'):
            validate_example = af.register_example(name=artifact_prefix + 'validate_example',
                                                   support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                   stream_uri=EXAMPLE_URI.format('evaluate'),
                                                   data_format='npz')
            validate_read_example = af.read_example(example_info=validate_example,
                                                    executor=PythonObjectExecutor(
                                                        python_object=ValidateExampleReader()))
            validate_transform = af.transform(input_data_list=[validate_read_example],
                                              executor=PythonObjectExecutor(python_object=ValidateTransformer()))
            validate_artifact_name = artifact_prefix + 'validate_artifact'
            validate_artifact = af.register_artifact(name=validate_artifact_name,
                                                     stream_uri=get_file_dir(__file__) + '/validate_result')
            validate_channel = af.model_validate(input_data_list=[validate_transform],
                                                 model_info=train_model,
                                                 executor=PythonObjectExecutor(
                                                     python_object=ModelValidator(validate_artifact_name)),
                                                 )
        with af.config('push_job'):
            # Push model to serving
            # Register metadata of pushed model
            push_model_artifact_name = artifact_prefix + 'push_model_artifact'
            push_model_artifact = af.register_artifact(name=push_model_artifact_name,
                                                       stream_uri=get_file_dir(__file__) + '/pushed_model')
            push_channel = af.push_model(model_info=train_model,
                                         executor=PythonObjectExecutor(
                                             python_object=ModelPusher(push_model_artifact_name)))

        with af.config('predict_job'):
            predict_example = af.register_example(name=artifact_prefix + 'predict_example',
                                                  support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                  stream_uri=EXAMPLE_URI.format('predict'))
            predict_read_example = af.read_example(example_info=predict_example,
                                                   executor=PythonObjectExecutor(python_object=PredictExampleReader()))
            predict_transform = af.transform(input_data_list=[predict_read_example],
                                             executor=PythonObjectExecutor(python_object=PredictTransformer()))
            predict_channel = af.predict(input_data_list=[predict_transform],
                                         model_info=train_model,
                                         executor=PythonObjectExecutor(python_object=ModelPredictor()))

            write_example = af.register_example(name=artifact_prefix + 'export_example',
                                                support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                stream_uri=get_file_dir(__file__) + '/predict_result')
            af.write_example(input_data=predict_channel,
                             example_info=write_example,
                             executor=PythonObjectExecutor(python_object=ExampleWriter()))

        af.model_version_control_dependency(src=validate_channel,
                                            model_version_event_type=ModelVersionEventType.MODEL_GENERATED,
                                            dependency=validate_trigger, model_name=train_model.name)

        af.model_version_control_dependency(src=push_channel,
                                            model_version_event_type=ModelVersionEventType.MODEL_VALIDATED,
                                            dependency=push_trigger, model_name=train_model.name)
    # Run workflow
    transform_dag = project_name
    af.deploy_to_airflow(project_root_path, dag_id=transform_dag)
    af.run(project_path=project_root_path,
           dag_id=transform_dag,
           scheduler_type=SchedulerType.AIRFLOW)

コード例 #12

0

ファイルを表示

ファイル: tutorial_workflow.py プロジェクト: bgeng777/flink-ai-extended

def run_workflow():
    # Init project
    af.init_ai_flow_context()

    artifact_prefix = af.current_project_config().get_project_name() + "."
    # Training of model
    with af.job_config('train'):
        # Register metadata of training data(dataset) and read dataset(i.e. training dataset)
        train_dataset = af.register_dataset(name=artifact_prefix + 'train_dataset',
                                            uri=DATASET_URI.format('train'))
        train_read_dataset = af.read_dataset(dataset_info=train_dataset,
                                             read_dataset_processor=DatasetReader())

        # Register model metadata and train model
        train_model = af.register_model(model_name=artifact_prefix + 'KNN',
                                        model_desc='KNN model')
        train_channel = af.train(input=[train_read_dataset],
                                 training_processor=ModelTrainer(),
                                 model_info=train_model)

    # Validation of model
    with af.job_config('validate'):
        # Read validation dataset
        validate_dataset = af.register_dataset(name=artifact_prefix + 'validate_dataset',
                                               uri=DATASET_URI.format('test'))
        # Validate model before it is used to predict
        validate_read_dataset = af.read_dataset(dataset_info=validate_dataset,
                                                read_dataset_processor=ValidateDatasetReader())
        validate_artifact_name = artifact_prefix + 'validate_artifact'
        validate_artifact = af.register_artifact(name=validate_artifact_name,
                                                 uri=get_file_dir(__file__) + '/validate_result')
        validate_channel = af.model_validate(input=[validate_read_dataset],
                                             model_info=train_model,
                                             model_validation_processor=ModelValidator(validate_artifact_name))

    # Prediction(Inference) using flink
    with af.job_config('predict'):
        # Read test data and do prediction
        predict_dataset = af.register_dataset(name=artifact_prefix + 'predict_dataset',
                                              uri=DATASET_URI.format('test'))
        predict_read_dataset = af.read_dataset(dataset_info=predict_dataset,
                                               read_dataset_processor=Source())
        predict_channel = af.predict(input=[predict_read_dataset],
                                     model_info=train_model,
                                     prediction_processor=Predictor())
        # Save prediction result
        write_dataset = af.register_dataset(name=artifact_prefix + 'write_dataset',
                                            uri=get_file_dir(__file__) + '/predict_result.csv')
        af.write_dataset(input=predict_channel,
                         dataset_info=write_dataset,
                         write_dataset_processor=Sink())

    # Define relation graph connected by control edge: train -> validate -> predict
    af.action_on_model_version_event(job_name='validate',
                                     model_version_event_type=ModelVersionEventType.MODEL_GENERATED,
                                     model_name=train_model.name)
    af.action_on_model_version_event(job_name='predict',
                                     model_version_event_type=ModelVersionEventType.MODEL_VALIDATED,
                                     model_name=train_model.name)
    # Submit workflow
    af.workflow_operation.submit_workflow(af.current_workflow_config().workflow_name)
    # Run workflow
    af.workflow_operation.start_new_workflow_execution(af.current_workflow_config().workflow_name)

コード例 #13

0

ファイルを表示

def run_project(project_root_path):

    af.set_project_config_file(project_root_path + "/project.yaml")
    project_name = af.project_config().get_project_name()
    artifact_prefix = project_name + "."

    validate_trigger = af.external_trigger(name='validate')
    push_trigger = af.external_trigger(name='push')

    with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'):
        # the config of train job is a periodic job  which means it will
        # run every `interval`(defined in workflow_config.yaml) seconds
        with af.config('train_job'):
            # Register metadata raw training data(example) and read example(i.e. training dataset)
            train_example = af.register_example(name=artifact_prefix + 'train_example',
                                                support_type=ExampleSupportType.EXAMPLE_BATCH,
                                                batch_uri=EXAMPLE_URI.format('train'))
            train_read_example = af.read_example(example_info=train_example,
                                                 executor=PythonObjectExecutor(python_object=ExampleReader()))

            # Transform(preprocessing) example
            train_transform = af.transform(input_data_list=[train_read_example],
                                           executor=PythonObjectExecutor(python_object=ExampleTransformer()))

            # Register model metadata and train model
            train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression',
                                            model_type=ModelType.SAVED_MODEL,
                                            model_desc='logistic regression model')
            train_channel = af.train(input_data_list=[train_transform],
                                     executor=PythonObjectExecutor(python_object=ModelTrainer()),
                                     model_info=train_model)
        with af.config('validate_job'):
            # Validation of model
            # Read validation dataset and validate model before it is used to predict

            validate_example = af.register_example(name=artifact_prefix + 'validate_example',
                                                   support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                   batch_uri=EXAMPLE_URI.format('evaluate'))
            validate_read_example = af.read_example(example_info=validate_example,
                                                    executor=PythonObjectExecutor(
                                                        python_object=ValidateExampleReader()))
            validate_transform = af.transform(input_data_list=[validate_read_example],
                                              executor=PythonObjectExecutor(python_object=ValidateTransformer()))
            validate_artifact_name = artifact_prefix + 'validate_artifact'
            validate_artifact = af.register_artifact(name=validate_artifact_name,
                                                     batch_uri=get_file_dir(__file__) + '/validate_result')
            validate_channel = af.model_validate(input_data_list=[validate_transform],
                                                 model_info=train_model,
                                                 executor=PythonObjectExecutor(
                                                     python_object=ModelValidator(validate_artifact_name)))
        with af.config('push_job'):
            # Push model to serving
            # Register metadata of pushed model
            push_model_artifact_name = artifact_prefix + 'push_model_artifact'
            push_model_artifact = af.register_artifact(name=push_model_artifact_name,
                                                       batch_uri=get_file_dir(__file__) + '/pushed_model')
            push_channel = af.push_model(model_info=train_model,
                                         executor=PythonObjectExecutor(
                                            python_object=ModelPusher(push_model_artifact_name)))

        with af.config('predict_job'):
            # Prediction(Inference)
            predict_example = af.register_example(name=artifact_prefix + 'predict_example',
                                                  support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                  stream_uri=EXAMPLE_URI.format('predict'))
            predict_read_example = af.read_example(example_info=predict_example,
                                                   executor=PythonObjectExecutor(python_object=PredictExampleReader()))
            predict_transform = af.transform(input_data_list=[predict_read_example],
                                             executor=PythonObjectExecutor(python_object=PredictTransformer()))
            predict_channel = af.predict(input_data_list=[predict_transform],
                                         model_info=train_model,
                                         executor=PythonObjectExecutor(python_object=ModelPredictor()))
            # Save prediction result
            write_example = af.register_example(name=artifact_prefix + 'write_example',
                                                support_type=ExampleSupportType.EXAMPLE_STREAM,
                                                stream_uri=get_file_dir(__file__) + '/predict_result')
            af.write_example(input_data=predict_channel,
                             example_info=write_example,
                             executor=PythonObjectExecutor(python_object=ExampleWriter()))

        # Define relation graph connected by control edge:
        # Once a round of training is done, validator will be launched and
        # pusher will be launched if the new model is better.
        # Prediction will start once the first round of training is done and
        # when pusher pushes(deploys) a new model, the predictor will use the latest deployed model as well.
        af.model_version_control_dependency(src=validate_channel,
                                            model_version_event_type=ModelVersionEventType.MODEL_GENERATED,
                                            dependency=validate_trigger, model_name=train_model.name)
        af.model_version_control_dependency(src=push_channel,
                                            model_version_event_type=ModelVersionEventType.MODEL_VALIDATED,
                                            dependency=push_trigger, model_name=train_model.name)

    # Run workflow
    transform_dag = project_name
    af.deploy_to_airflow(project_root_path, dag_id=transform_dag)
    af.run(project_path=project_root_path,
           dag_id=transform_dag,
           scheduler_type=SchedulerType.AIRFLOW)

コード例 #14

0

ファイルを表示

ファイル: batch_train_stream_predict.py プロジェクト: bgeng777/flink-ai-extended

def run_workflow():
    af.init_ai_flow_context()

    artifact_prefix = af.current_project_config().get_project_name() + "."

    # the config of train job is a periodic job  which means it will
    # run every `interval`(defined in workflow_config.yaml) seconds
    with af.job_config('train'):
        # Register metadata raw training data(dataset) and read dataset(i.e. training dataset)
        train_dataset = af.register_dataset(name=artifact_prefix +
                                            'train_dataset',
                                            uri=DATASET_URI.format('train'))
        train_read_dataset = af.read_dataset(
            dataset_info=train_dataset, read_dataset_processor=DatasetReader())

        # Transform(preprocessing) dataset
        train_transform = af.transform(
            input=[train_read_dataset],
            transform_processor=DatasetTransformer())

        # Register model metadata and train model
        train_model = af.register_model(model_name=artifact_prefix +
                                        'logistic-regression',
                                        model_desc='logistic regression model')
        train_channel = af.train(input=[train_transform],
                                 training_processor=ModelTrainer(),
                                 model_info=train_model)
    with af.job_config('validate'):
        # Validation of model
        # Read validation dataset and validate model before it is used to predict

        validate_dataset = af.register_dataset(
            name=artifact_prefix + 'validate_dataset',
            uri=DATASET_URI.format('evaluate'))
        validate_read_dataset = af.read_dataset(
            dataset_info=validate_dataset,
            read_dataset_processor=ValidateDatasetReader())
        validate_transform = af.transform(
            input=[validate_read_dataset],
            transform_processor=ValidateTransformer())
        validate_artifact_name = artifact_prefix + 'validate_artifact'
        validate_artifact = af.register_artifact(name=validate_artifact_name,
                                                 uri=get_file_dir(__file__) +
                                                 '/validate_result')
        validate_channel = af.model_validate(
            input=[validate_transform],
            model_info=train_model,
            model_validation_processor=ModelValidator(validate_artifact_name))
    with af.job_config('push'):
        # Push model to serving
        # Register metadata of pushed model
        push_model_artifact_name = artifact_prefix + 'push_model_artifact'
        push_model_artifact = af.register_artifact(
            name=push_model_artifact_name,
            uri=get_file_dir(__file__) + '/pushed_model')
        af.push_model(
            model_info=train_model,
            pushing_model_processor=ModelPusher(push_model_artifact_name))

    with af.job_config('predict'):
        # Prediction(Inference)
        predict_dataset = af.register_dataset(
            name=artifact_prefix + 'predict_dataset',
            uri=DATASET_URI.format('predict'))
        predict_read_dataset = af.read_dataset(
            dataset_info=predict_dataset,
            read_dataset_processor=PredictDatasetReader())
        predict_transform = af.transform(
            input=[predict_read_dataset],
            transform_processor=PredictTransformer())
        predict_channel = af.predict(input=[predict_transform],
                                     model_info=train_model,
                                     prediction_processor=ModelPredictor())
        # Save prediction result
        write_dataset = af.register_dataset(
            name=artifact_prefix + 'write_dataset',
            uri=get_file_dir(__file__) + '/predict_result')
        af.write_dataset(input=predict_channel,
                         dataset_info=write_dataset,
                         write_dataset_processor=DatasetWriter())

    # Define relation graph connected by control edge:
    # Once a round of training is done, validator will be launched and
    # pusher will be launched if the new model is better.
    # Prediction will start once the first round of training is done and
    # when pusher pushes(deploys) a new model, the predictor will use the latest deployed model as well.
    af.action_on_model_version_event(
        job_name='validate',
        model_version_event_type=ModelVersionEventType.MODEL_GENERATED,
        model_name=train_model.name)
    af.action_on_model_version_event(
        job_name='push',
        model_version_event_type=ModelVersionEventType.MODEL_VALIDATED,
        model_name=train_model.name)

    # Run workflow
    af.workflow_operation.submit_workflow(
        af.current_workflow_config().workflow_name)
    af.workflow_operation.start_new_workflow_execution(
        af.current_workflow_config().workflow_name)

コード例 #15

0

ファイルを表示

def run_workflow():
    af.init_ai_flow_context()

    artifact_prefix = af.current_project_config().get_project_name() + "."

    with af.job_config('train'):
        # Register metadata raw training data(dataset) and read dataset(i.e. training dataset)
        train_dataset = af.register_dataset(name=artifact_prefix +
                                            'train_dataset',
                                            uri=DATASET_URI.format('train'))
        train_read_dataset = af.read_dataset(
            dataset_info=train_dataset,
            read_dataset_processor=TrainDatasetReader())
        train_transform = af.transform(
            input=[train_read_dataset],
            transform_processor=TrainDatasetTransformer())
        train_model = af.register_model(model_name=artifact_prefix +
                                        'logistic-regression',
                                        model_desc='logistic regression model')
        train_channel = af.train(input=[train_transform],
                                 training_processor=ModelTrainer(),
                                 model_info=train_model)
    with af.job_config('validate'):
        validate_dataset = af.register_dataset(
            name=artifact_prefix + 'validate_dataset',
            uri=DATASET_URI.format('evaluate'))
        validate_read_dataset = af.read_dataset(
            dataset_info=validate_dataset,
            read_dataset_processor=ValidateDatasetReader())
        validate_transform = af.transform(
            input=[validate_read_dataset],
            transform_processor=ValidateTransformer())
        validate_artifact_name = artifact_prefix + 'validate_artifact'
        validate_artifact = af.register_artifact(name=validate_artifact_name,
                                                 uri=get_file_dir(__file__) +
                                                 '/validate_result')
        validate_channel = af.model_validate(
            input=[validate_transform],
            model_info=train_model,
            model_validation_processor=ModelValidator(validate_artifact_name))
    with af.job_config('push'):
        # Push model to serving
        # Register metadata of pushed model
        push_model_artifact_name = artifact_prefix + 'push_model_artifact'
        push_model_artifact = af.register_artifact(
            name=push_model_artifact_name,
            uri=get_file_dir(__file__) + '/pushed_model')
        af.push_model(
            model_info=train_model,
            pushing_model_processor=ModelPusher(push_model_artifact_name))
    with af.job_config('predict'):
        predict_dataset = af.register_dataset(
            name=artifact_prefix + 'predict_dataset',
            uri=DATASET_URI.format('predict'))
        predict_read_dataset = af.read_dataset(
            dataset_info=predict_dataset,
            read_dataset_processor=PredictDatasetReader())
        predict_transform = af.transform(
            input=[predict_read_dataset],
            transform_processor=PredictTransformer())
        predict_channel = af.predict(input=[predict_transform],
                                     model_info=train_model,
                                     prediction_processor=ModelPredictor())
        write_dataset = af.register_dataset(
            name=artifact_prefix + 'export_dataset',
            uri=get_file_dir(__file__) + '/predict_result')
        af.write_dataset(input=predict_channel,
                         dataset_info=write_dataset,
                         write_dataset_processor=DatasetWriter())

    af.action_on_model_version_event(
        job_name='validate',
        model_version_event_type=ModelVersionEventType.MODEL_GENERATED,
        model_name=train_model.name)
    af.action_on_model_version_event(
        job_name='push',
        model_version_event_type=ModelVersionEventType.MODEL_VALIDATED,
        model_name=train_model.name)

    # Run workflow
    af.workflow_operation.submit_workflow(
        af.current_workflow_config().workflow_name)
    af.workflow_operation.start_new_workflow_execution(
        af.current_workflow_config().workflow_name)

コード例 #16

0

ファイルを表示

def run_workflow():
    af.init_ai_flow_context()
    artifact_prefix = af.current_project_config().get_project_name() + "."
    with af.job_config('train'):
        # Training of model
        # Register metadata raw training data(dataset) and read dataset(i.e. training dataset)
        train_dataset = af.register_dataset(name=artifact_prefix +
                                            'train_dataset',
                                            uri=DATASET_URI.format('train'))
        train_read_dataset = af.read_dataset(
            dataset_info=train_dataset, read_dataset_processor=DatasetReader())

        # Transform(preprocessing) dataset
        train_transform = af.transform(
            input=[train_read_dataset],
            transform_processor=DatasetTransformer())

        # Register model metadata and train model
        train_model = af.register_model(model_name=artifact_prefix +
                                        'logistic-regression',
                                        model_desc='logistic regression model')
        train_channel = af.train(input=[train_transform],
                                 training_processor=ModelTrainer(),
                                 model_info=train_model)

    with af.job_config('evaluate'):
        # Evaluation of model
        evaluate_dataset = af.register_dataset(
            name=artifact_prefix + 'evaluate_dataset',
            uri=DATASET_URI.format('evaluate'))
        evaluate_read_dataset = af.read_dataset(
            dataset_info=evaluate_dataset,
            read_dataset_processor=EvaluateDatasetReader())
        evaluate_transform = af.transform(
            input=[evaluate_read_dataset],
            transform_processor=EvaluateTransformer())
        # Register disk path used to save evaluate result
        evaluate_artifact_name = artifact_prefix + 'evaluate_artifact'
        evaluate_artifact = af.register_artifact(name=evaluate_artifact_name,
                                                 uri=get_file_dir(__file__) +
                                                 '/evaluate_result')
        # Evaluate model
        evaluate_channel = af.evaluate(
            input=[evaluate_transform],
            model_info=train_model,
            evaluation_processor=ModelEvaluator(evaluate_artifact_name))

    with af.job_config('validate'):
        # Validation of model
        # Read validation dataset and validate model before it is used to predict

        validate_dataset = af.register_dataset(
            name=artifact_prefix + 'validate_dataset',
            uri=DATASET_URI.format('evaluate'))
        validate_read_dataset = af.read_dataset(
            dataset_info=validate_dataset,
            read_dataset_processor=ValidateDatasetReader())
        validate_transform = af.transform(
            input=[validate_read_dataset],
            transform_processor=ValidateTransformer())
        validate_artifact_name = artifact_prefix + 'validate_artifact'
        validate_artifact = af.register_artifact(name=validate_artifact_name,
                                                 uri=get_file_dir(__file__) +
                                                 '/validate_result')
        validate_channel = af.model_validate(
            input=[validate_transform],
            model_info=train_model,
            model_validation_processor=ModelValidator(validate_artifact_name))
    with af.job_config('push'):
        # Push model to serving
        # Register metadata of pushed model
        push_model_artifact_name = artifact_prefix + 'push_model_artifact'
        push_model_artifact = af.register_artifact(
            name=push_model_artifact_name,
            uri=get_file_dir(__file__) + '/pushed_model')
        af.push_model(
            model_info=train_model,
            pushing_model_processor=ModelPusher(push_model_artifact_name))

    with af.job_config('predict'):
        # Prediction(Inference)
        predict_dataset = af.register_dataset(
            name=artifact_prefix + 'predict_dataset',
            uri=DATASET_URI.format('predict'))
        predict_read_dataset = af.read_dataset(
            dataset_info=predict_dataset,
            read_dataset_processor=PredictDatasetReader())
        predict_transform = af.transform(
            input=[predict_read_dataset],
            transform_processor=PredictTransformer())
        predict_channel = af.predict(input=[predict_transform],
                                     model_info=train_model,
                                     prediction_processor=ModelPredictor())
        # Save prediction result
        write_dataset = af.register_dataset(
            name=artifact_prefix + 'write_dataset',
            uri=get_file_dir(__file__) + '/predict_result')
        af.write_dataset(input=predict_channel,
                         dataset_info=write_dataset,
                         write_dataset_processor=DatasetWriter())

        # Define relation graph connected by control edge: train -> evaluate -> validate -> push -> predict
        af.action_on_job_status('evaluate', 'train')
        af.action_on_job_status('validate', 'evaluate')
        af.action_on_job_status('push', 'validate')
        af.action_on_job_status('predict', 'push')

    # Run workflow
    af.workflow_operation.submit_workflow(
        af.current_workflow_config().workflow_name)
    af.workflow_operation.start_new_workflow_execution(
        af.current_workflow_config().workflow_name)