def execute(self, function_context: FlinkFunctionContext,
             input_table: Table) -> None:
     example_meta: ExampleMeta = function_context.node_spec.example_meta
     table_env: TableEnvironment = function_context.get_table_env()
     statement_set = function_context.get_statement_set()
     table_env.execute_sql("""
            create table write_predict_test_table (
                 face_id varchar,
                 label varchar
             ) with (
                 'connector' = 'kafka',
                 'topic' = 'tianchi_write_example',
                 'properties.bootstrap.servers' = '{}',
                 'properties.group.id' = 'write_example',
                 'properties.request.timeout.ms' = '30000',
                 'format' = 'csv',
                 'scan.startup.mode' = 'earliest-offset',
                 'csv.disable-quote-character' = 'true'
             )
             """.format(example_meta.stream_uri))
     input_table.insert_into('write_predict_test_table')
     # table_env.execute_sql("""
     #        create table write_predict_test_table (
     #             face_id varchar,
     #             label varchar
     #         ) with (
     #             'connector' = 'blackhole'
     #         )
     #         """)
     statement_set.add_insert('write_predict_test_table', input_table)
Beispiel #2
0
 def _from_file(self, filename, schema):
     gateway = get_gateway()
     jds = gateway.jvm.PythonBridgeUtils.createDataSetFromFile(
         self._j_tenv.execEnv(), filename, True)
     return Table(
         gateway.jvm.PythonTableUtils.fromDataSet(self._j_tenv, jds,
                                                  _to_java_type(schema)))
Beispiel #3
0
    def scan(self, *table_path):
        """
        Scans a registered table and returns the resulting :class:`Table`.
        A table to scan must be registered in the TableEnvironment. It can be either directly
        registered or be an external member of a :class:`pyflink.table.catalog.Catalog`.

        See the documentation of :func:`~pyflink.table.TableEnvironment.use_database` or
        :func:`~pyflink.table.TableEnvironment.use_catalog` for the rules on the path resolution.

        Examples:

        Scanning a directly registered table
        ::

            >>> tab = t_env.scan("tableName")

        Scanning a table from a registered catalog
        ::

            >>> tab = t_env.scan("catalogName", "dbName", "tableName")

        :param table_path: The path of the table to scan.
        :throws: Exception if no table is found using the given table path.
        :return: The resulting :class:`Table`
        """
        gateway = get_gateway()
        j_table_paths = utils.to_jarray(gateway.jvm.String, table_path)
        j_table = self._j_tenv.scan(j_table_paths)
        return Table(j_table)
Beispiel #4
0
    def _from_elements(self, elements, schema):
        """
        Creates a table from a collection of elements.

        :param elements: The elements to create a table from.
        :return: The result :class:`Table`.
        """

        # serializes to a file, and we read the file in java
        temp_file = tempfile.NamedTemporaryFile(delete=False,
                                                dir=tempfile.mkdtemp())
        serializer = BatchedSerializer(self._serializer)
        try:
            try:
                serializer.dump_to_stream(elements, temp_file)
            finally:
                temp_file.close()
            row_type_info = _to_java_type(schema)
            execution_config = self._get_execution_config(
                temp_file.name, schema)
            gateway = get_gateway()
            j_objs = gateway.jvm.PythonBridgeUtils.readPythonObjects(
                temp_file.name, True)
            j_input_format = gateway.jvm.PythonTableUtils.getInputFormat(
                j_objs, row_type_info, execution_config)
            j_table_source = gateway.jvm.PythonInputFormatTableSource(
                j_input_format, row_type_info)

            return Table(self._j_tenv.fromTableSource(j_table_source))
        finally:
            os.unlink(temp_file.name)
Beispiel #5
0
    def from_path(self, path):
        """
        Reads a registered table and returns the resulting :class:`Table`.

        A table to scan must be registered in the :class:`TableEnvironment`.

        See the documentation of :func:`use_database` or :func:`use_catalog` for the rules on the
        path resolution.

        Examples:

        Reading a table from default catalog and database.
        ::

            >>> tab = table_env.from_path("tableName")

        Reading a table from a registered catalog.
        ::

            >>> tab = table_env.from_path("catalogName.dbName.tableName")

        Reading a table from a registered catalog with escaping. (`Table` is a reserved keyword).
        Dots in e.g. a database name also must be escaped.
        ::

            >>> tab = table_env.from_path("catalogName.`db.Name`.`Table`")

        :param path: The path of a table API object to scan.
        :return: Either a table or virtual table (=view).

        .. seealso:: :func:`user_catalog`
        .. seealso:: :func:`user_database`
        """
        return Table(get_method(self._j_tenv, "from")(path))
Beispiel #6
0
    def from_table_source(self, table_source):
        """
        Creates a table from a table source.

        :param table_source: The table source used as table.
        :return: The result table.
        """
        return Table(self._j_tenv.fromTableSource(table_source._j_table_source))
Beispiel #7
0
    def getOutputTable(self) -> 'Table':
        """
        Get the output table represented by this operator.

        :return: the output table.
        """
        # noinspection PyProtectedMember
        from ....env import _mlenv
        _, btenv, _, stenv = _mlenv
        tenv = self._choose_by_op_type(btenv, stenv)
        return Table(self.get_j_obj().getOutputTable(), tenv)
Beispiel #8
0
 def _from_file(self, filename, schema):
     gateway = get_gateway()
     blink_t_env_class = get_java_class(gateway.jvm.org.apache.flink.table.
                                        api.internal.TableEnvironmentImpl)
     if blink_t_env_class == self._j_tenv.getClass():
         raise NotImplementedError(
             "The operation 'from_elements' in batch mode is currently "
             "not supported when using blink planner.")
     else:
         jds = gateway.jvm.PythonBridgeUtils.createDataSetFromFile(
             self._j_tenv.execEnv(), filename, True)
         return Table(
             gateway.jvm.PythonTableUtils.fromDataSet(
                 self._j_tenv, jds, _to_java_type(schema)))
Beispiel #9
0
    def from_table_source(self, table_source):
        """
        Creates a table from a table source.

        Example:
        ::

            >>> csv_table_source = CsvTableSource(
            ...     csv_file_path, ['a', 'b'], [DataTypes.STRING(), DataTypes.BIGINT()])
            ... table_env.from_table_source(csv_table_source)

        :param table_source: The table source used as table.
        :return: The result :class:`Table`.
        """
        return Table(self._j_tenv.fromTableSource(table_source._j_table_source))
Beispiel #10
0
    def sql_query(self, query):
        """
        Evaluates a SQL query on registered tables and retrieves the result as a :class:`Table`.

        All tables referenced by the query must be registered in the TableEnvironment.

        A :class:`Table` is automatically registered when its :func:`~Table.__str__` method is
        called, for example when it is embedded into a String.

        Hence, SQL queries can directly reference a :class:`Table` as follows:
        ::

            >>> table = ...
            # the table is not registered to the table environment
            >>> t_env.sql_query("SELECT * FROM %s" % table)

        :param query: The sql query string.
        :return: The result :class:`Table`.
        """
        j_table = self._j_tenv.sqlQuery(query)
        return Table(j_table)
Beispiel #11
0
    def scan(self, *table_path):
        """
        Scans a registered table and returns the resulting :class:`Table`.
        A table to scan must be registered in the TableEnvironment. It can be either directly
        registered as TableSource or Table.

        Examples:

        Scanning a directly registered table
        ::
            >>> tab = t_env.scan("tableName")

        Scanning a table from a registered catalog
        ::
            >>> tab = t_env.scan("catalogName", "dbName", "tableName")

        :param table_path: The path of the table to scan.
        :throws: Exception if no table is found using the given table path.
        :return: The resulting :class:`Table`
        """
        gateway = get_gateway()
        j_table_paths = utils.to_jarray(gateway.jvm.String, table_path)
        j_table = self._j_tenv.scan(j_table_paths)
        return Table(j_table)
 def execute_and_verify_transformed_table(self, table: Table):
     with table.execute().collect() as results:
         for result in results:
             self.assertTrue(result[0] + 1 == int(result[2]))
Beispiel #13
0
    def sql(self, query):
        """
        Evaluates single sql statement including DDLs and DMLs.

        Note: Always use this interface to execute a sql query. It only supports
        to execute one sql statement a time.

        A DDL statement can execute to create/drop a table/view:
        For example, the below DDL statement would create a CSV table named `tbl1`
        into the current catalog::

            create table tbl1(
                a int,
                b bigint,
                c varchar
            ) with (
                connector = 'csv',
                csv.path = 'xxx'
            )

        The returns table format for different kind of statement:

        DDL: returns None.

        DML: a sql insert returns None; a sql query(select) returns a table
        to describe the query data set, it can be further queried through the Table API,
        or directly write to sink with :func:`Table.insert_into`.

        SQL queries can directly execute as follows:
        ::

            >>> source_ddl = \\
            ... '''
            ... create table sourceTable(
            ...     a int,
            ...     b varchar
            ... ) with (
            ...     connector = 'kafka',
            ...     kafka.topic = 'xxx',
            ...     kafka.endpoint = 'x.x.x'
            ... )
            ... '''

            >>> sink_ddl = \\
            ... '''
            ... create table sinkTable(
            ...     a int,
            ...     b varchar
            ... ) with (
            ...     connector = 'csv',
            ...     csv.path = 'xxx'
            ... )
            ... '''

            >>> query = "INSERT INTO sinkTable SELECT FROM sourceTable"
            >>> table_env.sql(source_ddl)
            >>> table_env.sql(sink_ddl)
            >>> table_env.sql(query)
            >>> table_env.execute("MyJob")

        This code snippet creates a job to read data from Kafka source into a CSV sink.

        :param query: The SQL statement to evaluate.
        """
        j_table = self._j_tenv.sql(query)
        if j_table is None:
            return None
        return Table(j_table)