예제 #1
0
 def __init__(self,
              field_names,
              field_types,
              path,
              field_delimiter=',',
              num_files=1,
              write_mode=None):
     # type: (list[str], list[DataType], str, str, int, int) -> None
     gateway = get_gateway()
     if write_mode == WriteMode.NO_OVERWRITE:
         j_write_mode = gateway.jvm.scala.Option.apply(
             gateway.jvm.org.apache.flink.core.fs.FileSystem.WriteMode.
             NO_OVERWRITE)
     elif write_mode == WriteMode.OVERWRITE:
         j_write_mode = gateway.jvm.scala.Option.apply(
             gateway.jvm.org.apache.flink.core.fs.FileSystem.WriteMode.
             OVERWRITE)
     elif write_mode is None:
         j_write_mode = gateway.jvm.scala.Option.empty()
     else:
         raise Exception('Unsupported write_mode: %s' % write_mode)
     j_some_field_delimiter = gateway.jvm.scala.Option.apply(
         field_delimiter)
     j_some_num_files = gateway.jvm.scala.Option.apply(num_files)
     j_csv_table_sink = gateway.jvm.CsvTableSink(path,
                                                 j_some_field_delimiter,
                                                 j_some_num_files,
                                                 j_write_mode)
     j_field_names = utils.to_jarray(gateway.jvm.String, field_names)
     j_field_types = utils.to_jarray(
         gateway.jvm.TypeInformation,
         [_to_java_type(field_type) for field_type in field_types])
     j_csv_table_sink = j_csv_table_sink.configure(j_field_names,
                                                   j_field_types)
     super(CsvTableSink, self).__init__(j_csv_table_sink)
예제 #2
0
def call(f: Union[str, UserDefinedFunctionWrapper], *args) -> Expression:
    """
    The first parameter `f` could be a str or a Python user-defined function.

    When it is str, this is a call to a function that will be looked up in a catalog. There
    are two kinds of functions:

        - System functions - which are identified with one part names
        - Catalog functions - which are identified always with three parts names
            (catalog, database, function)

    Moreover each function can either be a temporary function or permanent one
    (which is stored in an external catalog).

    Based on that two properties the resolution order for looking up a function based on
    the provided `function_name` is following:

        - Temporary system function
        - System function
        - Temporary catalog function
        - Catalog function

    :param f: the path of the function or the Python user-defined function.
    :param args: parameters of the user-defined function.
    """
    gateway = get_gateway()

    if isinstance(f, str):
        return Expression(gateway.jvm.Expressions.call(
            f, to_jarray(gateway.jvm.Object, [_get_java_expression(arg) for arg in args])))

    def get_function_definition(f):
        if isinstance(f, UserDefinedTableFunctionWrapper):
            """
            TypeInference was not supported for TableFunction in the old planner. Use
            TableFunctionDefinition to work around this issue.
            """
            j_result_types = to_jarray(gateway.jvm.TypeInformation,
                                       [_to_java_type(i) for i in f._result_types])
            j_result_type = gateway.jvm.org.apache.flink.api.java.typeutils.RowTypeInfo(
                j_result_types)
            return gateway.jvm.org.apache.flink.table.functions.TableFunctionDefinition(
                'f', f.java_user_defined_function(), j_result_type)
        else:
            return f.java_user_defined_function()

    expressions_clz = load_java_class("org.apache.flink.table.api.Expressions")
    function_definition_clz = load_java_class('org.apache.flink.table.functions.FunctionDefinition')
    j_object_array_type = to_jarray(gateway.jvm.Object, []).getClass()

    api_call_method = expressions_clz.getDeclaredMethod(
        "apiCall",
        to_jarray(gateway.jvm.Class, [function_definition_clz, j_object_array_type]))
    api_call_method.setAccessible(True)

    return Expression(api_call_method.invoke(
        None,
        to_jarray(gateway.jvm.Object,
                  [get_function_definition(f),
                   to_jarray(gateway.jvm.Object, [_get_java_expression(arg) for arg in args])])))
예제 #3
0
파일: udf.py 프로젝트: zhuzhurk/flink
    def _create_judtf(self):
        func = self._func
        if not isinstance(self._func, UserDefinedFunction):
            func = DelegationTableFunction(self._func)

        import cloudpickle
        serialized_func = cloudpickle.dumps(func)

        gateway = get_gateway()
        if self._input_types is not None:
            j_input_types = utils.to_jarray(
                gateway.jvm.TypeInformation,
                [_to_java_type(i) for i in self._input_types])
        else:
            j_input_types = None

        j_result_types = utils.to_jarray(
            gateway.jvm.TypeInformation,
            [_to_java_type(i) for i in self._result_types])
        j_result_type = gateway.jvm.org.apache.flink.api.java.typeutils.RowTypeInfo(
            j_result_types)
        j_function_kind = gateway.jvm.org.apache.flink.table.functions.python. \
            PythonFunctionKind.GENERAL
        PythonTableFunction = gateway.jvm \
            .org.apache.flink.table.functions.python.PythonTableFunction
        j_table_function = PythonTableFunction(self._name,
                                               bytearray(serialized_func),
                                               j_input_types, j_result_type,
                                               j_function_kind,
                                               self._deterministic,
                                               _get_python_env())
        return j_table_function
예제 #4
0
 def __init__(self,
              hostname=None,
              port=None,
              line_delimiter=None,
              field_delimiter=None,
              field_names=None,
              field_types=None,
              append_proctime=None):
     gateway = get_gateway()
     j_builder = gateway.jvm.org.apache.flink.python.connector.SocketTableSource.Builder()
     if hostname is not None:
         j_builder.withHostname(hostname)
     if port is not None:
         j_builder.withPort(port)
     if line_delimiter is not None:
         j_builder.withLineDelimiter(line_delimiter)
     if field_delimiter is not None:
         j_builder.withFieldDelimiter(field_delimiter)
     if field_names is not None and field_types is not None:
         j_field_names = to_jarray(gateway.jvm.String, field_names)
         j_field_types = to_jarray(gateway.jvm.TypeInformation,
                                   [_to_java_type(field_type) for field_type in field_types])
         j_builder.withSchema(j_field_names, j_field_types)
     if append_proctime is not None:
         j_builder.appendProctime(append_proctime)
     super(SocketTableSource, self).__init__(j_builder.build())
예제 #5
0
 def __init__(self,
              field_names,
              field_types,
              path,
              field_delimiter=',',
              num_files=-1,
              write_mode=None):
     gateway = get_gateway()
     if write_mode == WriteMode.NO_OVERWRITE:
         j_write_mode = gateway.jvm.org.apache.flink.core.fs.FileSystem.WriteMode.NO_OVERWRITE
     elif write_mode == WriteMode.OVERWRITE:
         j_write_mode = gateway.jvm.org.apache.flink.core.fs.FileSystem.WriteMode.OVERWRITE
     elif write_mode is None:
         j_write_mode = None
     else:
         raise Exception('Unsupported write_mode: %s' % write_mode)
     j_csv_table_sink = gateway.jvm.CsvTableSink(path, field_delimiter,
                                                 num_files, j_write_mode)
     j_field_names = utils.to_jarray(gateway.jvm.String, field_names)
     j_field_types = utils.to_jarray(
         gateway.jvm.TypeInformation,
         [_to_java_type(field_type) for field_type in field_types])
     j_csv_table_sink = j_csv_table_sink.configure(j_field_names,
                                                   j_field_types)
     super(CsvTableSink, self).__init__(j_csv_table_sink)
 def __init__(self, j_table_sink, field_names, field_types):
     gateway = get_gateway()
     j_field_names = utils.to_jarray(gateway.jvm.String, field_names)
     j_field_types = utils.to_jarray(
         gateway.jvm.TypeInformation,
         [_to_java_type(field_type) for field_type in field_types])
     j_table_sink = j_table_sink.configure(j_field_names, j_field_types)
     super(TestTableSink, self).__init__(j_table_sink)
예제 #7
0
 def __init__(self, source_path, field_names, field_types):
     # type: (str, list[str], list[DataType]) -> None
     gateway = get_gateway()
     j_field_names = utils.to_jarray(gateway.jvm.String, field_names)
     j_field_types = utils.to_jarray(gateway.jvm.TypeInformation,
                                     [_to_java_type(field_type)
                                      for field_type in field_types])
     super(CsvTableSource, self).__init__(
         gateway.jvm.CsvTableSource(source_path, j_field_names, j_field_types))
예제 #8
0
 def __init__(self, field_names, field_types, out_row=100000):
     gateway = get_gateway()
     j_field_names = utils.to_jarray(gateway.jvm.String, field_names)
     j_field_types = utils.to_jarray(
         gateway.jvm.TypeInformation,
         [_to_java_type(field_type) for field_type in field_types])
     j_table_sink = gateway.jvm.com.alibaba.flink.sink.PrintTableSink(
         j_field_names, j_field_types, out_row)
     super(PrintTableSink, self).__init__(j_table_sink)
예제 #9
0
 def __init__(self, source_path, field_names, field_types):
     # type: (str, list[str], list[DataType]) -> None
     gateway = get_gateway()
     j_field_names = utils.to_jarray(gateway.jvm.String, field_names)
     j_field_types = utils.to_jarray(gateway.jvm.TypeInformation, [
         type_utils.to_java_type(field_type) for field_type in field_types
     ])
     super(CsvTableSource, self).__init__(
         gateway.jvm.CsvTableSource(source_path, j_field_names,
                                    j_field_types))
예제 #10
0
 def __init__(self, field_names=None, data_types=None, j_table_schema=None):
     if j_table_schema is None:
         gateway = get_gateway()
         j_field_names = to_jarray(gateway.jvm.String, field_names)
         j_data_types = to_jarray(
             gateway.jvm.TypeInformation,
             [_to_java_type(item) for item in data_types])
         self._j_table_schema = gateway.jvm.TableSchema(
             j_field_names, j_data_types)
     else:
         self._j_table_schema = j_table_schema
예제 #11
0
 def __init__(self, field_names, field_types):
     gateway = get_gateway()
     j_print_table_sink = gateway.jvm.org.apache.flink.python.connector.PrintTableSink(
     )
     j_field_names = to_jarray(gateway.jvm.String, field_names)
     j_field_types = to_jarray(
         gateway.jvm.TypeInformation,
         [_to_java_type(field_type) for field_type in field_types])
     j_print_table_sink = j_print_table_sink.configure(
         j_field_names, j_field_types)
     super(PrintTableSink, self).__init__(j_print_table_sink)
예제 #12
0
def call(f: Union[str, UserDefinedFunctionWrapper], *args) -> Expression:
    """
    The first parameter `f` could be a str or a Python user-defined function.

    When it is str, this is a call to a function that will be looked up in a catalog. There
    are two kinds of functions:

        - System functions - which are identified with one part names
        - Catalog functions - which are identified always with three parts names
            (catalog, database, function)

    Moreover each function can either be a temporary function or permanent one
    (which is stored in an external catalog).

    Based on that two properties the resolution order for looking up a function based on
    the provided `function_name` is following:

        - Temporary system function
        - System function
        - Temporary catalog function
        - Catalog function

    :param f: the path of the function or the Python user-defined function.
    :param args: parameters of the user-defined function.
    """
    gateway = get_gateway()
    return Expression(gateway.jvm.Expressions.call(
        f if isinstance(f, str) else f.java_user_defined_function(),
        to_jarray(gateway.jvm.Object, [_get_java_expression(arg) for arg in args])))
예제 #13
0
    def _create_judf(self):
        gateway = get_gateway()

        def get_python_function_kind(udf_type):
            JPythonFunctionKind = gateway.jvm.org.apache.flink.table.functions.python.\
                PythonFunctionKind
            if udf_type == "general":
                return JPythonFunctionKind.GENERAL
            elif udf_type == "pandas":
                return JPythonFunctionKind.PANDAS
            else:
                raise TypeError("Unsupported udf_type: %s." % udf_type)

        func = self._func
        if not isinstance(self._func, UserDefinedFunction):
            func = DelegatingScalarFunction(self._func)

        import cloudpickle
        serialized_func = cloudpickle.dumps(func)

        j_input_types = utils.to_jarray(
            gateway.jvm.TypeInformation,
            [_to_java_type(i) for i in self._input_types])
        j_result_type = _to_java_type(self._result_type)
        j_function_kind = get_python_function_kind(self._udf_type)
        PythonScalarFunction = gateway.jvm \
            .org.apache.flink.table.functions.python.PythonScalarFunction
        j_scalar_function = PythonScalarFunction(self._name,
                                                 bytearray(serialized_func),
                                                 j_input_types, j_result_type,
                                                 j_function_kind,
                                                 self._deterministic,
                                                 _get_python_env())
        return j_scalar_function
예제 #14
0
    def over_window(self, *over_windows):
        """
        Defines over-windows on the records of a table.

        An over-window defines for each record an interval of records over which aggregation
        functions can be computed.

        Example:
        ::

            >>> table.window(Over.partition_by("c").order_by("rowTime") \\
            ...     .preceding("10.seconds").alias("ow")) \\
            ...     .select("c, b.count over ow, e.sum over ow")

        .. note::

            Computing over window aggregates on a streaming table is only a parallel
            operation if the window is partitioned. Otherwise, the whole stream will be processed
            by a single task, i.e., with parallelism 1.

        .. note::

            Over-windows for batch tables are currently not supported.

        :param over_windows: over windows created from :class:`~pyflink.table.window.Over`.
        :type over_windows: pyflink.table.window.OverWindow
        :return: A over windowed table.
        :rtype: pyflink.table.OverWindowedTable
        """
        gateway = get_gateway()
        window_array = to_jarray(
            gateway.jvm.OverWindow,
            [item._java_over_window for item in over_windows])
        return OverWindowedTable(self._j_table.window(window_array),
                                 self._t_env)
예제 #15
0
    def java_user_defined_function(self):
        if self._judf_placeholder is None:
            gateway = get_gateway()

            def get_python_function_kind():
                JPythonFunctionKind = gateway.jvm.org.apache.flink.table.functions.python. \
                    PythonFunctionKind
                if self._func_type == "general":
                    return JPythonFunctionKind.GENERAL
                elif self._func_type == "pandas":
                    return JPythonFunctionKind.PANDAS
                else:
                    raise TypeError("Unsupported func_type: %s." %
                                    self._func_type)

            if self._input_types is not None:
                j_input_types = utils.to_jarray(
                    gateway.jvm.TypeInformation,
                    [_to_java_type(i) for i in self._input_types])
            else:
                j_input_types = None
            j_function_kind = get_python_function_kind()
            func = self._func
            if not isinstance(self._func, UserDefinedFunction):
                func = self._create_delegate_function()

            import cloudpickle
            serialized_func = cloudpickle.dumps(func)
            self._judf_placeholder = \
                self._create_judf(serialized_func, j_input_types, j_function_kind)
        return self._judf_placeholder
예제 #16
0
파일: table.py 프로젝트: zlzhangv007/flink
    def over_window(self, *over_windows: OverWindow) -> 'OverWindowedTable':
        """
        Defines over-windows on the records of a table.

        An over-window defines for each record an interval of records over which aggregation
        functions can be computed.

        Example:
        ::

            >>> from pyflink.table import expressions as expr
            >>> tab.over_window(Over.partition_by(tab.c).order_by(tab.rowtime) \\
            ...     .preceding(lit(10).seconds).alias("ow")) \\
            ...     .select(tab.c, tab.b.count.over(col('ow'), tab.e.sum.over(col('ow'))))

        .. note::

            Computing over window aggregates on a streaming table is only a parallel
            operation if the window is partitioned. Otherwise, the whole stream will be processed
            by a single task, i.e., with parallelism 1.

        .. note::

            Over-windows for batch tables are currently not supported.

        :param over_windows: over windows created from :class:`~pyflink.table.window.Over`.
        :return: A over windowed table.
        """
        gateway = get_gateway()
        window_array = to_jarray(
            gateway.jvm.OverWindow,
            [item._java_over_window for item in over_windows])
        return OverWindowedTable(self._j_table.window(window_array),
                                 self._t_env)
예제 #17
0
    def scan(self, *table_path):
        """
        Scans a registered table and returns the resulting :class:`Table`.
        A table to scan must be registered in the TableEnvironment. It can be either directly
        registered or be an external member of a :class:`pyflink.table.catalog.Catalog`.

        See the documentation of :func:`~pyflink.table.TableEnvironment.use_database` or
        :func:`~pyflink.table.TableEnvironment.use_catalog` for the rules on the path resolution.

        Examples:

        Scanning a directly registered table
        ::

            >>> tab = t_env.scan("tableName")

        Scanning a table from a registered catalog
        ::

            >>> tab = t_env.scan("catalogName", "dbName", "tableName")

        :param table_path: The path of the table to scan.
        :throws: Exception if no table is found using the given table path.
        :return: The resulting :class:`Table`
        """
        gateway = get_gateway()
        j_table_paths = utils.to_jarray(gateway.jvm.String, table_path)
        j_table = self._j_tenv.scan(j_table_paths)
        return Table(j_table)
예제 #18
0
    def _create_judf(self, is_blink_planner, table_config):
        func = self._func
        if not isinstance(self._func, UserDefinedFunction):
            func = DelegatingScalarFunction(self._func)

        import cloudpickle
        serialized_func = cloudpickle.dumps(func)

        gateway = get_gateway()
        j_input_types = utils.to_jarray(
            gateway.jvm.TypeInformation,
            [_to_java_type(i) for i in self._input_types])
        j_result_type = _to_java_type(self._result_type)
        if is_blink_planner:
            PythonTableUtils = gateway.jvm\
                .org.apache.flink.table.planner.utils.python.PythonTableUtils
            j_scalar_function = PythonTableUtils \
                .createPythonScalarFunction(table_config,
                                            self._name,
                                            bytearray(serialized_func),
                                            j_input_types,
                                            j_result_type,
                                            self._deterministic,
                                            _get_python_env())
        else:
            PythonTableUtils = gateway.jvm.PythonTableUtils
            j_scalar_function = PythonTableUtils \
                .createPythonScalarFunction(self._name,
                                            bytearray(serialized_func),
                                            j_input_types,
                                            j_result_type,
                                            self._deterministic,
                                            _get_python_env())

        return j_scalar_function
예제 #19
0
파일: udf.py 프로젝트: zfq314/flink-1
    def _create_judf(self, serialized_func, j_input_types, j_function_kind):
        if self._func_type == "pandas":
            from pyflink.table.types import DataTypes
            self._accumulator_type = DataTypes.ARRAY(self._result_type)

        if j_input_types is not None:
            gateway = get_gateway()
            j_input_types = utils.to_jarray(
                gateway.jvm.DataType, [_to_java_data_type(i) for i in self._input_types])
        j_result_type = _to_java_data_type(self._result_type)
        j_accumulator_type = _to_java_data_type(self._accumulator_type)

        gateway = get_gateway()
        if self._is_table_aggregate:
            PythonAggregateFunction = gateway.jvm \
                .org.apache.flink.table.functions.python.PythonTableAggregateFunction
        else:
            PythonAggregateFunction = gateway.jvm \
                .org.apache.flink.table.functions.python.PythonAggregateFunction
        j_aggregate_function = PythonAggregateFunction(
            self._name,
            bytearray(serialized_func),
            j_input_types,
            j_result_type,
            j_accumulator_type,
            j_function_kind,
            self._deterministic,
            self._takes_row_as_input,
            _get_python_env())
        return j_aggregate_function
예제 #20
0
파일: table.py 프로젝트: aljoscha/flink
    def over_window(self, *over_windows):
        """
        Defines over-windows on the records of a table.

        An over-window defines for each record an interval of records over which aggregation
        functions can be computed.

        Example:
        ::
            >>> table.window(Over.partition_by("c").order_by("rowTime")\
            ...     .preceding("10.seconds").alias("ow"))\
            ...     .select("c, b.count over ow, e.sum over ow")

        .. note::
            Computing over window aggregates on a streaming table is only a parallel
            operation if the window is partitioned. Otherwise, the whole stream will be processed
            by a single task, i.e., with parallelism 1.

        .. note::
            Over-windows for batch tables are currently not supported.

        :param over_windows: :class:`OverWindow`s created from :class:`Over`.
        :return: A :class:`OverWindowedTable`.
        """
        gateway = get_gateway()
        window_array = to_jarray(gateway.jvm.OverWindow,
                                 [item._java_over_window for item in over_windows])
        return OverWindowedTable(self._j_table.window(window_array))
예제 #21
0
    def register_table_sink(self, name, field_names, field_types, table_sink):
        """
        Registers an external :class:`TableSink` with given field names and types in this
        :class:`TableEnvironment`'s catalog.
        Registered sink tables can be referenced in SQL DML statements.

        :param name: The name under which the :class:`TableSink` is registered.
        :param field_names: The field names to register with the :class:`TableSink`.
        :param field_types: The field types to register with the :class:`TableSink`.
        :param table_sink: The :class:`TableSink` to register.
        """
        gateway = get_gateway()
        j_field_names = utils.to_jarray(gateway.jvm.String, field_names)
        j_field_types = utils.to_jarray(
            gateway.jvm.TypeInformation,
            [_to_java_type(field_type) for field_type in field_types])
        self._j_tenv.registerTableSink(name, j_field_names, j_field_types, table_sink._j_table_sink)
    def register_table_sink(self, name, field_names, field_types, table_sink):
        """
        Registers an external :class:`TableSink` with given field names and types in this
        :class:`TableEnvironment`'s catalog.
        Registered sink tables can be referenced in SQL DML statements.

        :param name: The name under which the :class:`TableSink` is registered.
        :param field_names: The field names to register with the :class:`TableSink`.
        :param field_types: The field types to register with the :class:`TableSink`.
        :param table_sink: The :class:`TableSink` to register.
        """
        gateway = get_gateway()
        j_field_names = utils.to_jarray(gateway.jvm.String, field_names)
        j_field_types = utils.to_jarray(
            gateway.jvm.TypeInformation,
            [type_utils.to_java_type(field_type) for field_type in field_types])
        self._j_tenv.registerTableSink(name, j_field_names, j_field_types, table_sink._j_table_sink)
예제 #23
0
def or_(predicate0: Union[bool, Expression[bool]],
        predicate1: Union[bool, Expression[bool]],
        *predicates: Union[bool, Expression[bool]]) -> Expression[bool]:
    """
    Boolean OR in three-valued logic.
    """
    gateway = get_gateway()
    predicates = to_jarray(gateway.jvm.Object, [_get_java_expression(p) for p in predicates])
    return _ternary_op("or", predicate0, predicate1, predicates)
예제 #24
0
def concat(first: Union[str, Expression[str]],
           *others: Union[str, Expression[str]]) -> Expression[str]:
    """
    Returns the string that results from concatenating the arguments.
    Returns NULL if any argument is NULL.
    """
    gateway = get_gateway()
    return _binary_op("concat",
                      first,
                      to_jarray(gateway.jvm.Object,
                                [_get_java_expression(other) for other in others]))
예제 #25
0
 def __init__(self,
              file_path,
              field_names,
              field_types,
              field_delimiter=None,
              line_delimiter=None):
     gateway = get_gateway()
     j_builder = gateway.jvm.org.apache.flink.python.connector.CsvRetractTableSink.Builder(
     )
     j_builder.withFilePath(file_path)
     j_field_names = to_jarray(gateway.jvm.String, field_names)
     j_field_types = to_jarray(
         gateway.jvm.TypeInformation,
         [_to_java_type(field_type) for field_type in field_types])
     j_builder.withSchema(j_field_names, j_field_types)
     if field_delimiter is not None:
         j_builder.withFieldDelimiter(field_delimiter)
     if line_delimiter is not None:
         j_builder.withLineDelimiter(line_delimiter)
     super(CsvRetractTableSink, self).__init__(j_builder.build())
예제 #26
0
def row(head, *tail) -> Expression:
    """
    Creates a row of expressions.

    Example:
    ::

        >>> tab.select(row("key1", 1))
    """
    gateway = get_gateway()
    tail = to_jarray(gateway.jvm.Object, [_get_java_expression(t) for t in tail])
    return _binary_op("row", head, tail)
예제 #27
0
def array(head, *tail) -> Expression:
    """
    Creates an array of literals.

    Example:
    ::

        >>> tab.select(array(1, 2, 3))
    """
    gateway = get_gateway()
    tail = to_jarray(gateway.jvm.Object, [_get_java_expression(t) for t in tail])
    return _binary_op("array", head, tail)
예제 #28
0
파일: connectors.py 프로젝트: zmm1911/flink
    def sink(sql: str, type_info: RowTypeInfo, jdbc_connection_options: 'JdbcConnectionOptions',
             jdbc_execution_options: 'JdbcExecutionOptions' = None):
        """
        Create a JDBC sink.

        :param sql: arbitrary DML query (e.g. insert, update, upsert)
        :param type_info: A RowTypeInfo for query field types.
        :param jdbc_execution_options:  parameters of execution, such as batch size and maximum
                                        retries.
        :param jdbc_connection_options: parameters of connection, such as JDBC URL.
        :return: A JdbcSink.
        """
        sql_types = []
        gateway = get_gateway()
        JJdbcTypeUtil = gateway.jvm.org.apache.flink.connector.jdbc.utils.JdbcTypeUtil
        for field_type in type_info.get_field_types():
            if isinstance(field_type, WrapperTypeInfo):
                sql_types.append(JJdbcTypeUtil
                                 .typeInformationToSqlType(field_type.get_java_type_info()))
            else:
                raise ValueError('field_type must be WrapperTypeInfo')
        j_sql_type = to_jarray(gateway.jvm.int, sql_types)
        output_format_clz = gateway.jvm.Class\
            .forName('org.apache.flink.connector.jdbc.internal.JdbcBatchingOutputFormat', False,
                     get_gateway().jvm.Thread.currentThread().getContextClassLoader())
        j_int_array_type = to_jarray(gateway.jvm.int, []).getClass()
        j_builder_method = output_format_clz.getDeclaredMethod('createRowJdbcStatementBuilder',
                                                               to_jarray(gateway.jvm.Class,
                                                                         [j_int_array_type]))
        j_builder_method.setAccessible(True)
        j_statement_builder = j_builder_method.invoke(None, to_jarray(gateway.jvm.Object,
                                                                      [j_sql_type]))

        jdbc_execution_options = jdbc_execution_options if jdbc_execution_options is not None \
            else JdbcExecutionOptions.defaults()
        j_jdbc_sink = gateway.jvm.org.apache.flink.connector.jdbc.JdbcSink\
            .sink(sql, j_statement_builder, jdbc_execution_options._j_jdbc_execution_options,
                  jdbc_connection_options._j_jdbc_connection_options)
        return JdbcSink(j_jdbc_sink=j_jdbc_sink)
예제 #29
0
 def get_function_definition(f):
     if isinstance(f, UserDefinedTableFunctionWrapper):
         """
         TypeInference was not supported for TableFunction in the old planner. Use
         TableFunctionDefinition to work around this issue.
         """
         j_result_types = to_jarray(gateway.jvm.TypeInformation,
                                    [_to_java_type(i) for i in f._result_types])
         j_result_type = gateway.jvm.org.apache.flink.api.java.typeutils.RowTypeInfo(
             j_result_types)
         return gateway.jvm.org.apache.flink.table.functions.TableFunctionDefinition(
             'f', f.java_user_defined_function(), j_result_type)
     else:
         return f.java_user_defined_function()
예제 #30
0
def invoke_java_object_method(obj, method_name):
    clz = obj.getClass()
    j_method = None
    while clz is not None:
        try:
            j_method = clz.getDeclaredMethod(method_name, None)
            if j_method is not None:
                break
        except:
            clz = clz.getSuperclass()
    if j_method is None:
        raise Exception("No such method: " + method_name)
    j_method.setAccessible(True)
    return j_method.invoke(obj, to_jarray(get_gateway().jvm.Object, []))
예제 #31
0
    def alias(self, name: str, *extra_names: str) -> 'Expression[T]':
        """
        Specifies a name for an expression i.e. a field.

        Example:
        ::

            >>> tab.select(col('a').alias('b'))

        :param name: name for one field.
        :param extra_names: additional names if the expression expands to multiple fields
        """
        gateway = get_gateway()
        return _ternary_op("as")(self, name,
                                 to_jarray(gateway.jvm.String, extra_names))
예제 #32
0
 def _create_judf(self, serialized_func, j_input_types, j_function_kind):
     gateway = get_gateway()
     j_result_types = utils.to_jarray(
         gateway.jvm.TypeInformation,
         [_to_java_type(i) for i in self._result_types])
     j_result_type = gateway.jvm.org.apache.flink.api.java.typeutils.RowTypeInfo(
         j_result_types)
     PythonTableFunction = gateway.jvm \
         .org.apache.flink.table.functions.python.PythonTableFunction
     j_table_function = PythonTableFunction(self._name,
                                            bytearray(serialized_func),
                                            j_input_types, j_result_type,
                                            j_function_kind,
                                            self._deterministic,
                                            _get_python_env())
     return j_table_function
예제 #33
0
def with_columns(head, *tails) -> Expression:
    """
    Creates an expression that selects a range of columns. It can be used wherever an array of
    expression is accepted such as function calls, projections, or groupings.

    A range can either be index-based or name-based. Indices start at 1 and boundaries are
    inclusive.

    e.g. with_columns(range_("b", "c")) or with_columns(col("*"))

    .. seealso:: :func:`~pyflink.table.expressions.range_`,
                 :func:`~pyflink.table.expressions.without_columns`
    """
    gateway = get_gateway()
    tails = to_jarray(gateway.jvm.Object, [_get_java_expression(t) for t in tails])
    return _binary_op("withColumns", head, tails)
예제 #34
0
파일: table.py 프로젝트: apache/flink
    def insert_into(self, table_path, *table_path_continued):
        """
        Writes the :class:`Table` to a :class:`TableSink` that was registered under
        the specified name. For the path resolution algorithm see
        :func:`~TableEnvironment.useDatabase`.

        Example:
        ::
            >>> tab.insert_into("print")

        :param table_path: The first part of the path of the registered :class:`TableSink` to which
               the :class:`Table` is written. This is to ensure at least the name of the
               :class:`Table` is provided.
        :param table_path_continued: The remaining part of the path of the registered
                :class:`TableSink` to which the :class:`Table`  is written.
        """
        gateway = get_gateway()
        j_table_path = to_jarray(gateway.jvm.String, table_path_continued)
        self._j_table.insertInto(table_path, j_table_path)
    def scan(self, *table_path):
        """
        Scans a registered table and returns the resulting :class:`Table`.
        A table to scan must be registered in the TableEnvironment. It can be either directly
        registered as TableSource or Table.

        Examples:

        Scanning a directly registered table
        ::
            >>> tab = t_env.scan("tableName")

        Scanning a table from a registered catalog
        ::
            >>> tab = t_env.scan("catalogName", "dbName", "tableName")

        :param table_path: The path of the table to scan.
        :throws: Exception if no table is found using the given table path.
        :return: The resulting :class:`Table`
        """
        gateway = get_gateway()
        j_table_paths = utils.to_jarray(gateway.jvm.String, table_path)
        j_table = self._j_tenv.scan(j_table_paths)
        return Table(j_table)