def execute(self, function_context: FlinkFunctionContext,
             input_table: Table) -> None:
     example_meta: ExampleMeta = function_context.node_spec.example_meta
     table_env: TableEnvironment = function_context.get_table_env()
     statement_set = function_context.get_statement_set()
     table_env.execute_sql("""
            create table write_predict_test_table (
                 face_id varchar,
                 label varchar
             ) with (
                 'connector' = 'kafka',
                 'topic' = 'tianchi_write_example',
                 'properties.bootstrap.servers' = '{}',
                 'properties.group.id' = 'write_example',
                 'properties.request.timeout.ms' = '30000',
                 'format' = 'csv',
                 'scan.startup.mode' = 'earliest-offset',
                 'csv.disable-quote-character' = 'true'
             )
             """.format(example_meta.stream_uri))
     input_table.insert_into('write_predict_test_table')
     # table_env.execute_sql("""
     #        create table write_predict_test_table (
     #             face_id varchar,
     #             label varchar
     #         ) with (
     #             'connector' = 'blackhole'
     #         )
     #         """)
     statement_set.add_insert('write_predict_test_table', input_table)
Example #2
0
 def execute(self, function_context: FlinkFunctionContext) -> Table:
     table_env: TableEnvironment = function_context.get_table_env()
     ddl = """create table test_table (
             face_id varchar,
             feature_data varchar
             )with (
                     'connector.type' = 'filesystem',
                     'format.type' = 'csv',
                     'connector.path' = '{}',
                     'format.field-delimiter' = ';'
                 )""".format(function_context.get_example_meta().batch_uri)
     table_env.execute_sql(ddl)
     return table_env.from_path('test_table')
Example #3
0
 def execute(self, function_context: FlinkFunctionContext) -> Table:
     table_env: TableEnvironment = function_context.get_table_env()
     path = function_context.get_example_meta().batch_uri
     ddl = """create table training_table(
                             uuid varchar,
                             face_id varchar,
                             device_id varchar,
                             feature_data varchar
                 ) with (
                     'connector.type' = 'filesystem',
                     'format.type' = 'csv',
                     'connector.path' = '{}',
                     'format.ignore-first-line' = 'false',
                     'format.field-delimiter' = ';'
                 )""".format(path)
     table_env.execute_sql(ddl)
     return table_env.from_path('training_table')
Example #4
0
    def execute(self, function_context: FlinkFunctionContext,
                input_table: Table) -> None:
        example_meta: ExampleMeta = function_context.get_example_meta()
        output_file = example_meta.batch_uri
        if os.path.exists(output_file):
            if os.path.isdir(output_file):
                shutil.rmtree(output_file)
            else:
                os.remove(output_file)
        t_env = function_context.get_table_env()
        statement_set = function_context.get_statement_set()
        sink = CsvTableSink(
            ['a', 'b'],
            [DataTypes.STRING(), DataTypes.STRING()], output_file, ';')

        t_env.register_table_sink('mySink', sink)
        statement_set.add_insert('mySink', input_table)
Example #5
0
 def execute(self, function_context: FlinkFunctionContext,
             input_table: Table) -> None:
     t_env = function_context.get_table_env()
     statement_set = function_context.get_statement_set()
     dummy_output_path = function_context.get_example_meta().batch_uri
     if os.path.exists(dummy_output_path):
         if os.path.isdir(dummy_output_path):
             shutil.rmtree(dummy_output_path)
         else:
             os.remove(dummy_output_path)
     sink = CsvTableSink(
         ['a', 'b', 'c'],
         [DataTypes.STRING(),
          DataTypes.STRING(),
          DataTypes.STRING()], dummy_output_path, ';')
     t_env.register_table_sink('mySink', sink)
     statement_set.add_insert("mySink", input_table)
Example #6
0
 def execute(self, function_context: FlinkFunctionContext,
             input_list: List[Table]) -> List[Table]:
     t_env = function_context.get_table_env()
     table = input_list[0]
     t_env.register_function(
         "predict",
         udf(f=PredictFunction(None),
             input_types=[DataTypes.STRING()],
             result_type=DataTypes.STRING()))
     return [table.select('face_id, predict(feature_data) as label')]
Example #7
0
 def execute(self, function_context: FlinkFunctionContext,
             input_list: List[Table]) -> List[Table]:
     t_env = function_context.get_table_env()
     table_0 = input_list[0]
     t_env.create_temporary_view('near_table', table_0)
     join_query = """select
     near_table.face_id, training_table.face_id
     from training_table
     inner join near_table
     on training_table.uuid=near_table.near_id"""
     return [t_env.sql_query(join_query)]
Example #8
0
 def execute(self, function_context: FlinkFunctionContext,
             input_table: Table) -> None:
     table_env: TableEnvironment = function_context.get_table_env()
     statement_set = function_context.get_statement_set()
     table_env.execute_sql("""
            create table write_example (
                 face_id varchar,
                 device_id varchar,
                 near_id int
             ) with (
                 'connector' = 'kafka',
                 'topic' = 'tianchi_write_example',
                 'properties.bootstrap.servers' = 'localhost:9092',
                 'properties.group.id' = 'write_example',
                 'format' = 'csv',
                 'scan.startup.mode' = 'earliest-offset',
                 'csv.disable-quote-character' = 'true'
             )
             """)
     statement_set.add_insert('write_example', input_table)
Example #9
0
 def execute(self, function_context: FlinkFunctionContext) -> Table:
     table_env: TableEnvironment = function_context.get_table_env()
     table_env.execute_sql("""
         create table online_example (
             face_id varchar,
             device_id varchar,
             feature_data varchar
         ) with (
             'connector' = 'kafka',
             'topic' = 'tianchi_read_example',
             'properties.bootstrap.servers' = 'localhost:9092',
             'properties.group.id' = 'read_example',
             'format' = 'csv',
             'scan.startup.mode' = 'earliest-offset'
         )
     """)
     table = table_env.from_path('online_example')
     # Notification AIFlow to send online example messages.
     update_notification('source', function_context.node_spec.instance_id)
     return table
def submit_flink_job(exec_env, t_env: TableEnvironment,
                     statement_set: StatementSet, flink_job: LocalFlinkJob,
                     graph: FlinkRunGraph):
    context = flink_job.job_context
    value_map = {}
    for i in range(len(graph.nodes)):
        node = graph.nodes[i]
        function_context: FlinkFunctionContext = FlinkFunctionContext(
            exec_env=exec_env,
            t_env=t_env,
            statement_set=statement_set,
            node_spec=node,
            job_context=context)
        c: Union[Executor, SourceExecutor,
                 SinkExecutor] = graph.executor_list[i]
        c.setup(function_context)
        if node.instance_id in graph.dependencies:
            ds = graph.dependencies[node.instance_id]
            params = []
            for d in ds:
                params.append(value_map[d.target_node_id][d.port])
            value_map[node.instance_id] = c.execute(function_context, params)
        else:
            value_map[node.instance_id] = c.execute(function_context, [])
        c.close(function_context)
    job_client = statement_set.execute().get_job_client()
    if job_client is not None:
        workflow_dir = '{}/temp/{}'.format(
            flink_job.job_config.project_path,
            str(flink_job.job_context.workflow_execution_id))
        os.makedirs(workflow_dir, exist_ok=True)
        with open('{}/{}'.format(workflow_dir, flink_job.instance_id),
                  'w') as f:
            logging.info(
                'workflow execution id: {}, job uuid: {}, Flink job id: {}'.
                format(flink_job.job_context.workflow_execution_id,
                       flink_job.instance_id, job_client.get_job_id()))
            f.write(str(job_client.get_job_id()))
        job_client.get_job_execution_result(user_class_loader=None).result()