def execute(self, function_context: FlinkFunctionContext, input_table: Table) -> None: example_meta: ExampleMeta = function_context.node_spec.example_meta table_env: TableEnvironment = function_context.get_table_env() statement_set = function_context.get_statement_set() table_env.execute_sql(""" create table write_predict_test_table ( face_id varchar, label varchar ) with ( 'connector' = 'kafka', 'topic' = 'tianchi_write_example', 'properties.bootstrap.servers' = '{}', 'properties.group.id' = 'write_example', 'properties.request.timeout.ms' = '30000', 'format' = 'csv', 'scan.startup.mode' = 'earliest-offset', 'csv.disable-quote-character' = 'true' ) """.format(example_meta.stream_uri)) input_table.insert_into('write_predict_test_table') # table_env.execute_sql(""" # create table write_predict_test_table ( # face_id varchar, # label varchar # ) with ( # 'connector' = 'blackhole' # ) # """) statement_set.add_insert('write_predict_test_table', input_table)
def execute(self, function_context: FlinkFunctionContext) -> Table: table_env: TableEnvironment = function_context.get_table_env() ddl = """create table test_table ( face_id varchar, feature_data varchar )with ( 'connector.type' = 'filesystem', 'format.type' = 'csv', 'connector.path' = '{}', 'format.field-delimiter' = ';' )""".format(function_context.get_example_meta().batch_uri) table_env.execute_sql(ddl) return table_env.from_path('test_table')
def execute(self, function_context: FlinkFunctionContext) -> Table: table_env: TableEnvironment = function_context.get_table_env() path = function_context.get_example_meta().batch_uri ddl = """create table training_table( uuid varchar, face_id varchar, device_id varchar, feature_data varchar ) with ( 'connector.type' = 'filesystem', 'format.type' = 'csv', 'connector.path' = '{}', 'format.ignore-first-line' = 'false', 'format.field-delimiter' = ';' )""".format(path) table_env.execute_sql(ddl) return table_env.from_path('training_table')
def execute(self, function_context: FlinkFunctionContext, input_table: Table) -> None: example_meta: ExampleMeta = function_context.get_example_meta() output_file = example_meta.batch_uri if os.path.exists(output_file): if os.path.isdir(output_file): shutil.rmtree(output_file) else: os.remove(output_file) t_env = function_context.get_table_env() statement_set = function_context.get_statement_set() sink = CsvTableSink( ['a', 'b'], [DataTypes.STRING(), DataTypes.STRING()], output_file, ';') t_env.register_table_sink('mySink', sink) statement_set.add_insert('mySink', input_table)
def execute(self, function_context: FlinkFunctionContext, input_table: Table) -> None: t_env = function_context.get_table_env() statement_set = function_context.get_statement_set() dummy_output_path = function_context.get_example_meta().batch_uri if os.path.exists(dummy_output_path): if os.path.isdir(dummy_output_path): shutil.rmtree(dummy_output_path) else: os.remove(dummy_output_path) sink = CsvTableSink( ['a', 'b', 'c'], [DataTypes.STRING(), DataTypes.STRING(), DataTypes.STRING()], dummy_output_path, ';') t_env.register_table_sink('mySink', sink) statement_set.add_insert("mySink", input_table)
def execute(self, function_context: FlinkFunctionContext, input_list: List[Table]) -> List[Table]: t_env = function_context.get_table_env() table = input_list[0] t_env.register_function( "predict", udf(f=PredictFunction(None), input_types=[DataTypes.STRING()], result_type=DataTypes.STRING())) return [table.select('face_id, predict(feature_data) as label')]
def execute(self, function_context: FlinkFunctionContext, input_list: List[Table]) -> List[Table]: t_env = function_context.get_table_env() table_0 = input_list[0] t_env.create_temporary_view('near_table', table_0) join_query = """select near_table.face_id, training_table.face_id from training_table inner join near_table on training_table.uuid=near_table.near_id""" return [t_env.sql_query(join_query)]
def execute(self, function_context: FlinkFunctionContext, input_table: Table) -> None: table_env: TableEnvironment = function_context.get_table_env() statement_set = function_context.get_statement_set() table_env.execute_sql(""" create table write_example ( face_id varchar, device_id varchar, near_id int ) with ( 'connector' = 'kafka', 'topic' = 'tianchi_write_example', 'properties.bootstrap.servers' = 'localhost:9092', 'properties.group.id' = 'write_example', 'format' = 'csv', 'scan.startup.mode' = 'earliest-offset', 'csv.disable-quote-character' = 'true' ) """) statement_set.add_insert('write_example', input_table)
def execute(self, function_context: FlinkFunctionContext) -> Table: table_env: TableEnvironment = function_context.get_table_env() table_env.execute_sql(""" create table online_example ( face_id varchar, device_id varchar, feature_data varchar ) with ( 'connector' = 'kafka', 'topic' = 'tianchi_read_example', 'properties.bootstrap.servers' = 'localhost:9092', 'properties.group.id' = 'read_example', 'format' = 'csv', 'scan.startup.mode' = 'earliest-offset' ) """) table = table_env.from_path('online_example') # Notification AIFlow to send online example messages. update_notification('source', function_context.node_spec.instance_id) return table
def submit_flink_job(exec_env, t_env: TableEnvironment, statement_set: StatementSet, flink_job: LocalFlinkJob, graph: FlinkRunGraph): context = flink_job.job_context value_map = {} for i in range(len(graph.nodes)): node = graph.nodes[i] function_context: FlinkFunctionContext = FlinkFunctionContext( exec_env=exec_env, t_env=t_env, statement_set=statement_set, node_spec=node, job_context=context) c: Union[Executor, SourceExecutor, SinkExecutor] = graph.executor_list[i] c.setup(function_context) if node.instance_id in graph.dependencies: ds = graph.dependencies[node.instance_id] params = [] for d in ds: params.append(value_map[d.target_node_id][d.port]) value_map[node.instance_id] = c.execute(function_context, params) else: value_map[node.instance_id] = c.execute(function_context, []) c.close(function_context) job_client = statement_set.execute().get_job_client() if job_client is not None: workflow_dir = '{}/temp/{}'.format( flink_job.job_config.project_path, str(flink_job.job_context.workflow_execution_id)) os.makedirs(workflow_dir, exist_ok=True) with open('{}/{}'.format(workflow_dir, flink_job.instance_id), 'w') as f: logging.info( 'workflow execution id: {}, job uuid: {}, Flink job id: {}'. format(flink_job.job_context.workflow_execution_id, flink_job.instance_id, job_client.get_job_id())) f.write(str(job_client.get_job_id())) job_client.get_job_execution_result(user_class_loader=None).result()