Beispiel #1
0
def run_flink_job():
    input_file = "/test1.csv"
    output_file ="/output_test1.csv"
    if os.path.exists(output_file):
        os.remove(output_file)

    example_1 = af.create_example(name="example_1",
                                  support_type=af.ExampleSupportType.EXAMPLE_BOTH,
                                  batch_uri=input_file,
                                  stream_uri=input_file,
                                  data_format="csv")

    example_2 = af.create_example(name="example_2",
                                  support_type=af.ExampleSupportType.EXAMPLE_BOTH,
                                  batch_uri=output_file,
                                  stream_uri=output_file,
                                  data_format="csv")
    flink_config = faf.LocalFlinkJobConfig()
    flink_config.flink_home = "/Users/chenwuchao/soft/apache/flink-1.10.0"
    with af.config(flink_config):
        batch_args_1: Properties = {}
        ddl = """CREATE TABLE input_table (a STRING, b STRING, c STRING) WITH ('connector' = 'filesystem',
                'path' = 'INPUT',
                'format' = 'csv'
                )"""
        table_name = "input_table"
        batch_args_1['ddl'] = ddl
        batch_args_1['table_name'] = table_name

        stream_args_1 = batch_args_1

        batch_args_2: Properties = {}
        ddl = """CREATE TABLE output_table (aa STRING, bb STRING) WITH ('connector' = 'filesystem',
                'path' = 'OUTPUT',
                'format' = 'csv'
                )"""
        table_name = "output_table"
        batch_args_2['ddl'] = ddl
        batch_args_2['table_name'] = table_name
        stream_args_2 = batch_args_2

        input_example = af.read_example(example_info=example_1,
                                        exec_args=ExecuteProperties(
                                            batch_properties=batch_args_1,
                                            stream_properties=stream_args_1)
                                        )
        processed = af.transform(input_data_list=[input_example],
                                 executor=faf.FlinkJavaExecutor(
                                     java_class="com.apache.flink.ai.flow.TestTransformer"))

        af.write_example(input_data=processed,
                         example_info=example_2,
                         exec_args=ExecuteProperties(
                             batch_properties=batch_args_2,
                             stream_properties=stream_args_2)
                         )

    workflow = af.compile_workflow(test_util.get_project_path())
    print(json_utils.dumps(list(workflow.jobs.values())[0]))
    def test_run_pyflink_job(self):
        project_path = os.path.dirname(__file__) + '/../'
        af.set_project_config_file(project_path + "project.yaml")
        input_file = get_parent_dir(
            get_file_dir(__file__)) + '/resources/word_count.txt'
        output_file = get_file_dir(__file__) + "/word_count_output.csv"
        if os.path.exists(output_file):
            os.remove(output_file)

        example_1 = af.create_example(
            name="example_1",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=input_file,
            stream_uri=input_file,
            data_format="csv")

        example_2 = af.create_example(
            name="example_2",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=output_file,
            stream_uri=output_file,
            data_format="csv")
        flink_config = faf.LocalFlinkJobConfig()
        flink_config.local_mode = 'cluster'
        flink_config.flink_home = '/Users/chenwuchao/soft/apache/flink-1.11.0/'
        flink_config.set_table_env_create_func(TableEnvCreator())
        with af.config(flink_config):
            input_example = af.read_example(
                example_info=example_1,
                executor=faf.flink_executor.FlinkPythonExecutor(
                    python_object=Source()))
            processed = af.transform(
                input_data_list=[input_example],
                executor=faf.flink_executor.FlinkPythonExecutor(
                    python_object=Transformer()))

            af.write_example(input_data=processed,
                             example_info=example_2,
                             executor=faf.flink_executor.FlinkPythonExecutor(
                                 python_object=Sink()))
        workflow_id = af.run(project_path)
        res = af.wait_workflow_execution_finished(workflow_id)
    def test_run_pyflink_job(self):
        input_file = get_parent_dir(
            get_file_dir(__file__)) + '/resources/word_count.txt'
        output_file = get_file_dir(__file__) + "/word_count_output.csv"
        if os.path.exists(output_file):
            os.remove(output_file)

        example_1 = af.create_example(
            name="example_1",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=input_file,
            stream_uri=input_file,
            data_format="csv")

        example_2 = af.create_example(
            name="example_2",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=output_file,
            stream_uri=output_file,
            data_format="csv")
        flink_config = faf.LocalFlinkJobConfig()
        flink_config.local_mode = 'python'
        flink_config.set_table_env_create_func(TableEnvCreator())
        with af.config(flink_config):
            input_example = af.read_example(
                example_info=example_1,
                executor=faf.flink_executor.FlinkPythonExecutor(
                    python_object=Source()))
            processed = af.transform(
                input_data_list=[input_example],
                executor=faf.flink_executor.FlinkPythonExecutor(
                    python_object=Transformer()))

            af.write_example(input_data=processed,
                             example_info=example_2,
                             executor=faf.flink_executor.FlinkPythonExecutor(
                                 python_object=Sink()))
        workflow_id = af.run(test_util.get_project_path())
        res = af.wait_workflow_execution_finished(workflow_id)
        self.assertEqual(0, res)
def run_flink_predict_job():
    input_file = "/test1.csv"
    output_file = "/output_test2.csv"
    example_1 = af.create_example(
        name="example_1",
        support_type=af.ExampleSupportType.EXAMPLE_BOTH,
        batch_uri=input_file,
        stream_uri=input_file,
        data_format="csv")

    example_2 = af.create_example(
        name="example_2",
        support_type=af.ExampleSupportType.EXAMPLE_BOTH,
        batch_uri=output_file,
        stream_uri=output_file,
        data_format="csv")
    flink_config = faf.LocalFlinkJobConfig()
    flink_config.flink_home = ''
    with af.config(flink_config):
        batch_args_1: Properties = {}
        ddl = """CREATE TABLE input_table (a INT, b INT, c INT) WITH ('connector' = 'filesystem',
                        'path' = 'INPUT',
                        'format' = 'csv'
                        )"""
        table_name = "input_table"
        batch_args_1['ddl'] = ddl
        batch_args_1['table_name'] = table_name

        stream_args_1 = batch_args_1

        batch_args_2: Properties = {}
        ddl = """CREATE TABLE output_table (aa INT, cc INT) WITH ('connector' = 'filesystem',
                        'path' = 'OUTPUT',
                        'format' = 'csv'
                        )"""
        table_name = "output_table"
        batch_args_2['ddl'] = ddl
        batch_args_2['table_name'] = table_name
        stream_args_2 = batch_args_2

        input_example = af.read_example(example_info=example_1,
                                        exec_args=ExecuteArgs(
                                            batch_properties=batch_args_1,
                                            stream_properties=stream_args_1))
        model_meta = af.ModelMeta(name="test", model_type="saved_model")
        model_version = af.ModelVersionMeta(version="11111",
                                            model_path="./tmp/saved_model/",
                                            model_metric="./tmp/saved_model/",
                                            model_id=0)
        processed = af.predict(
            input_data_list=[input_example],
            model_info=model_meta,
            model_version_info=model_version,
            executor=faf.flink_executor.FlinkJavaExecutor(
                java_class="com.apache.flink.ai.flow.TestPredict"))

        af.write_example(input_data=processed,
                         example_info=example_2,
                         exec_args=ExecuteArgs(
                             batch_properties=batch_args_2,
                             stream_properties=stream_args_2))

    g = af.default_graph()
    workflow = af.compile_workflow(project_path=test_util.get_project_path())
    print(dumps(list(workflow.jobs.values())[0]))
    def test_run_local_flink_job(self):
        project_name = 'test_project'
        workflow_name = 'test_workflow'
        dag_id = '{}.{}'.format(project_name, workflow_name)

        input_file = project_path() + '/resources/word_count.txt'
        output_file = project_path() + '/resources/word_count_output.csv'
        if os.path.exists(output_file):
            os.remove(output_file)

        example_1 = af.create_example(
            name="example_1",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=input_file,
            stream_uri=input_file,
            data_format="csv")

        example_2 = af.create_example(
            name="example_2",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=output_file,
            stream_uri=output_file,
            data_format="csv")
        if 'test_run_mode' in os.environ and 'cluster' == os.environ[
                'test_run_mode']:
            task_config = 'task_4'
        else:
            task_config = 'task_3'

        def run_task_function(client: NotificationClient):
            with af.global_config_file(workflow_config_file()):
                with af.config(task_config):
                    input_example = af.read_example(
                        example_info=example_1,
                        executor=faf.flink_executor.FlinkPythonExecutor(
                            python_object=Source()))
                    processed = af.transform(
                        input_data_list=[input_example],
                        executor=faf.flink_executor.FlinkPythonExecutor(
                            python_object=Transformer()))

                    af.write_example(
                        input_data=processed,
                        example_info=example_2,
                        executor=faf.flink_executor.FlinkPythonExecutor(
                            python_object=Sink()))
                workflow_info = af.workflow_operation.submit_workflow(
                    workflow_name)

            af.workflow_operation.start_new_workflow_execution(workflow_name)
            while True:
                with create_session() as session:
                    dag_run = session.query(DagRun).filter(
                        DagRun.dag_id == 'test_project.test_workflow').first()
                    if dag_run is not None and dag_run.state in State.finished:
                        break
                    else:
                        time.sleep(1)

        self.run_ai_flow(dag_id, run_task_function)
        with create_session() as session:
            tes = session.query(TaskExecution).filter(
                TaskExecution.dag_id == 'test_project.test_workflow',
                TaskExecution.task_id == task_config).all()
            self.assertEqual(1, len(tes))