def execute(self, function_context: FlinkFunctionContext, input_table: Table) -> None: example_meta: ExampleMeta = function_context.node_spec.example_meta table_env: TableEnvironment = function_context.get_table_env() statement_set = function_context.get_statement_set() table_env.execute_sql(""" create table write_predict_test_table ( face_id varchar, label varchar ) with ( 'connector' = 'kafka', 'topic' = 'tianchi_write_example', 'properties.bootstrap.servers' = '{}', 'properties.group.id' = 'write_example', 'properties.request.timeout.ms' = '30000', 'format' = 'csv', 'scan.startup.mode' = 'earliest-offset', 'csv.disable-quote-character' = 'true' ) """.format(example_meta.stream_uri)) input_table.insert_into('write_predict_test_table') # table_env.execute_sql(""" # create table write_predict_test_table ( # face_id varchar, # label varchar # ) with ( # 'connector' = 'blackhole' # ) # """) statement_set.add_insert('write_predict_test_table', input_table)
def _from_file(self, filename, schema): gateway = get_gateway() jds = gateway.jvm.PythonBridgeUtils.createDataSetFromFile( self._j_tenv.execEnv(), filename, True) return Table( gateway.jvm.PythonTableUtils.fromDataSet(self._j_tenv, jds, _to_java_type(schema)))
def scan(self, *table_path): """ Scans a registered table and returns the resulting :class:`Table`. A table to scan must be registered in the TableEnvironment. It can be either directly registered or be an external member of a :class:`pyflink.table.catalog.Catalog`. See the documentation of :func:`~pyflink.table.TableEnvironment.use_database` or :func:`~pyflink.table.TableEnvironment.use_catalog` for the rules on the path resolution. Examples: Scanning a directly registered table :: >>> tab = t_env.scan("tableName") Scanning a table from a registered catalog :: >>> tab = t_env.scan("catalogName", "dbName", "tableName") :param table_path: The path of the table to scan. :throws: Exception if no table is found using the given table path. :return: The resulting :class:`Table` """ gateway = get_gateway() j_table_paths = utils.to_jarray(gateway.jvm.String, table_path) j_table = self._j_tenv.scan(j_table_paths) return Table(j_table)
def _from_elements(self, elements, schema): """ Creates a table from a collection of elements. :param elements: The elements to create a table from. :return: The result :class:`Table`. """ # serializes to a file, and we read the file in java temp_file = tempfile.NamedTemporaryFile(delete=False, dir=tempfile.mkdtemp()) serializer = BatchedSerializer(self._serializer) try: try: serializer.dump_to_stream(elements, temp_file) finally: temp_file.close() row_type_info = _to_java_type(schema) execution_config = self._get_execution_config( temp_file.name, schema) gateway = get_gateway() j_objs = gateway.jvm.PythonBridgeUtils.readPythonObjects( temp_file.name, True) j_input_format = gateway.jvm.PythonTableUtils.getInputFormat( j_objs, row_type_info, execution_config) j_table_source = gateway.jvm.PythonInputFormatTableSource( j_input_format, row_type_info) return Table(self._j_tenv.fromTableSource(j_table_source)) finally: os.unlink(temp_file.name)
def from_path(self, path): """ Reads a registered table and returns the resulting :class:`Table`. A table to scan must be registered in the :class:`TableEnvironment`. See the documentation of :func:`use_database` or :func:`use_catalog` for the rules on the path resolution. Examples: Reading a table from default catalog and database. :: >>> tab = table_env.from_path("tableName") Reading a table from a registered catalog. :: >>> tab = table_env.from_path("catalogName.dbName.tableName") Reading a table from a registered catalog with escaping. (`Table` is a reserved keyword). Dots in e.g. a database name also must be escaped. :: >>> tab = table_env.from_path("catalogName.`db.Name`.`Table`") :param path: The path of a table API object to scan. :return: Either a table or virtual table (=view). .. seealso:: :func:`user_catalog` .. seealso:: :func:`user_database` """ return Table(get_method(self._j_tenv, "from")(path))
def from_table_source(self, table_source): """ Creates a table from a table source. :param table_source: The table source used as table. :return: The result table. """ return Table(self._j_tenv.fromTableSource(table_source._j_table_source))
def getOutputTable(self) -> 'Table': """ Get the output table represented by this operator. :return: the output table. """ # noinspection PyProtectedMember from ....env import _mlenv _, btenv, _, stenv = _mlenv tenv = self._choose_by_op_type(btenv, stenv) return Table(self.get_j_obj().getOutputTable(), tenv)
def _from_file(self, filename, schema): gateway = get_gateway() blink_t_env_class = get_java_class(gateway.jvm.org.apache.flink.table. api.internal.TableEnvironmentImpl) if blink_t_env_class == self._j_tenv.getClass(): raise NotImplementedError( "The operation 'from_elements' in batch mode is currently " "not supported when using blink planner.") else: jds = gateway.jvm.PythonBridgeUtils.createDataSetFromFile( self._j_tenv.execEnv(), filename, True) return Table( gateway.jvm.PythonTableUtils.fromDataSet( self._j_tenv, jds, _to_java_type(schema)))
def from_table_source(self, table_source): """ Creates a table from a table source. Example: :: >>> csv_table_source = CsvTableSource( ... csv_file_path, ['a', 'b'], [DataTypes.STRING(), DataTypes.BIGINT()]) ... table_env.from_table_source(csv_table_source) :param table_source: The table source used as table. :return: The result :class:`Table`. """ return Table(self._j_tenv.fromTableSource(table_source._j_table_source))
def sql_query(self, query): """ Evaluates a SQL query on registered tables and retrieves the result as a :class:`Table`. All tables referenced by the query must be registered in the TableEnvironment. A :class:`Table` is automatically registered when its :func:`~Table.__str__` method is called, for example when it is embedded into a String. Hence, SQL queries can directly reference a :class:`Table` as follows: :: >>> table = ... # the table is not registered to the table environment >>> t_env.sql_query("SELECT * FROM %s" % table) :param query: The sql query string. :return: The result :class:`Table`. """ j_table = self._j_tenv.sqlQuery(query) return Table(j_table)
def scan(self, *table_path): """ Scans a registered table and returns the resulting :class:`Table`. A table to scan must be registered in the TableEnvironment. It can be either directly registered as TableSource or Table. Examples: Scanning a directly registered table :: >>> tab = t_env.scan("tableName") Scanning a table from a registered catalog :: >>> tab = t_env.scan("catalogName", "dbName", "tableName") :param table_path: The path of the table to scan. :throws: Exception if no table is found using the given table path. :return: The resulting :class:`Table` """ gateway = get_gateway() j_table_paths = utils.to_jarray(gateway.jvm.String, table_path) j_table = self._j_tenv.scan(j_table_paths) return Table(j_table)
def execute_and_verify_transformed_table(self, table: Table): with table.execute().collect() as results: for result in results: self.assertTrue(result[0] + 1 == int(result[2]))
def sql(self, query): """ Evaluates single sql statement including DDLs and DMLs. Note: Always use this interface to execute a sql query. It only supports to execute one sql statement a time. A DDL statement can execute to create/drop a table/view: For example, the below DDL statement would create a CSV table named `tbl1` into the current catalog:: create table tbl1( a int, b bigint, c varchar ) with ( connector = 'csv', csv.path = 'xxx' ) The returns table format for different kind of statement: DDL: returns None. DML: a sql insert returns None; a sql query(select) returns a table to describe the query data set, it can be further queried through the Table API, or directly write to sink with :func:`Table.insert_into`. SQL queries can directly execute as follows: :: >>> source_ddl = \\ ... ''' ... create table sourceTable( ... a int, ... b varchar ... ) with ( ... connector = 'kafka', ... kafka.topic = 'xxx', ... kafka.endpoint = 'x.x.x' ... ) ... ''' >>> sink_ddl = \\ ... ''' ... create table sinkTable( ... a int, ... b varchar ... ) with ( ... connector = 'csv', ... csv.path = 'xxx' ... ) ... ''' >>> query = "INSERT INTO sinkTable SELECT FROM sourceTable" >>> table_env.sql(source_ddl) >>> table_env.sql(sink_ddl) >>> table_env.sql(query) >>> table_env.execute("MyJob") This code snippet creates a job to read data from Kafka source into a CSV sink. :param query: The SQL statement to evaluate. """ j_table = self._j_tenv.sql(query) if j_table is None: return None return Table(j_table)