def __init__(self, field_names, field_types, path, field_delimiter=',', num_files=1, write_mode=None): # type: (list[str], list[DataType], str, str, int, int) -> None gateway = get_gateway() if write_mode == WriteMode.NO_OVERWRITE: j_write_mode = gateway.jvm.scala.Option.apply( gateway.jvm.org.apache.flink.core.fs.FileSystem.WriteMode. NO_OVERWRITE) elif write_mode == WriteMode.OVERWRITE: j_write_mode = gateway.jvm.scala.Option.apply( gateway.jvm.org.apache.flink.core.fs.FileSystem.WriteMode. OVERWRITE) elif write_mode is None: j_write_mode = gateway.jvm.scala.Option.empty() else: raise Exception('Unsupported write_mode: %s' % write_mode) j_some_field_delimiter = gateway.jvm.scala.Option.apply( field_delimiter) j_some_num_files = gateway.jvm.scala.Option.apply(num_files) j_csv_table_sink = gateway.jvm.CsvTableSink(path, j_some_field_delimiter, j_some_num_files, j_write_mode) j_field_names = utils.to_jarray(gateway.jvm.String, field_names) j_field_types = utils.to_jarray( gateway.jvm.TypeInformation, [_to_java_type(field_type) for field_type in field_types]) j_csv_table_sink = j_csv_table_sink.configure(j_field_names, j_field_types) super(CsvTableSink, self).__init__(j_csv_table_sink)
def call(f: Union[str, UserDefinedFunctionWrapper], *args) -> Expression: """ The first parameter `f` could be a str or a Python user-defined function. When it is str, this is a call to a function that will be looked up in a catalog. There are two kinds of functions: - System functions - which are identified with one part names - Catalog functions - which are identified always with three parts names (catalog, database, function) Moreover each function can either be a temporary function or permanent one (which is stored in an external catalog). Based on that two properties the resolution order for looking up a function based on the provided `function_name` is following: - Temporary system function - System function - Temporary catalog function - Catalog function :param f: the path of the function or the Python user-defined function. :param args: parameters of the user-defined function. """ gateway = get_gateway() if isinstance(f, str): return Expression(gateway.jvm.Expressions.call( f, to_jarray(gateway.jvm.Object, [_get_java_expression(arg) for arg in args]))) def get_function_definition(f): if isinstance(f, UserDefinedTableFunctionWrapper): """ TypeInference was not supported for TableFunction in the old planner. Use TableFunctionDefinition to work around this issue. """ j_result_types = to_jarray(gateway.jvm.TypeInformation, [_to_java_type(i) for i in f._result_types]) j_result_type = gateway.jvm.org.apache.flink.api.java.typeutils.RowTypeInfo( j_result_types) return gateway.jvm.org.apache.flink.table.functions.TableFunctionDefinition( 'f', f.java_user_defined_function(), j_result_type) else: return f.java_user_defined_function() expressions_clz = load_java_class("org.apache.flink.table.api.Expressions") function_definition_clz = load_java_class('org.apache.flink.table.functions.FunctionDefinition') j_object_array_type = to_jarray(gateway.jvm.Object, []).getClass() api_call_method = expressions_clz.getDeclaredMethod( "apiCall", to_jarray(gateway.jvm.Class, [function_definition_clz, j_object_array_type])) api_call_method.setAccessible(True) return Expression(api_call_method.invoke( None, to_jarray(gateway.jvm.Object, [get_function_definition(f), to_jarray(gateway.jvm.Object, [_get_java_expression(arg) for arg in args])])))
def _create_judtf(self): func = self._func if not isinstance(self._func, UserDefinedFunction): func = DelegationTableFunction(self._func) import cloudpickle serialized_func = cloudpickle.dumps(func) gateway = get_gateway() if self._input_types is not None: j_input_types = utils.to_jarray( gateway.jvm.TypeInformation, [_to_java_type(i) for i in self._input_types]) else: j_input_types = None j_result_types = utils.to_jarray( gateway.jvm.TypeInformation, [_to_java_type(i) for i in self._result_types]) j_result_type = gateway.jvm.org.apache.flink.api.java.typeutils.RowTypeInfo( j_result_types) j_function_kind = gateway.jvm.org.apache.flink.table.functions.python. \ PythonFunctionKind.GENERAL PythonTableFunction = gateway.jvm \ .org.apache.flink.table.functions.python.PythonTableFunction j_table_function = PythonTableFunction(self._name, bytearray(serialized_func), j_input_types, j_result_type, j_function_kind, self._deterministic, _get_python_env()) return j_table_function
def __init__(self, hostname=None, port=None, line_delimiter=None, field_delimiter=None, field_names=None, field_types=None, append_proctime=None): gateway = get_gateway() j_builder = gateway.jvm.org.apache.flink.python.connector.SocketTableSource.Builder() if hostname is not None: j_builder.withHostname(hostname) if port is not None: j_builder.withPort(port) if line_delimiter is not None: j_builder.withLineDelimiter(line_delimiter) if field_delimiter is not None: j_builder.withFieldDelimiter(field_delimiter) if field_names is not None and field_types is not None: j_field_names = to_jarray(gateway.jvm.String, field_names) j_field_types = to_jarray(gateway.jvm.TypeInformation, [_to_java_type(field_type) for field_type in field_types]) j_builder.withSchema(j_field_names, j_field_types) if append_proctime is not None: j_builder.appendProctime(append_proctime) super(SocketTableSource, self).__init__(j_builder.build())
def __init__(self, field_names, field_types, path, field_delimiter=',', num_files=-1, write_mode=None): gateway = get_gateway() if write_mode == WriteMode.NO_OVERWRITE: j_write_mode = gateway.jvm.org.apache.flink.core.fs.FileSystem.WriteMode.NO_OVERWRITE elif write_mode == WriteMode.OVERWRITE: j_write_mode = gateway.jvm.org.apache.flink.core.fs.FileSystem.WriteMode.OVERWRITE elif write_mode is None: j_write_mode = None else: raise Exception('Unsupported write_mode: %s' % write_mode) j_csv_table_sink = gateway.jvm.CsvTableSink(path, field_delimiter, num_files, j_write_mode) j_field_names = utils.to_jarray(gateway.jvm.String, field_names) j_field_types = utils.to_jarray( gateway.jvm.TypeInformation, [_to_java_type(field_type) for field_type in field_types]) j_csv_table_sink = j_csv_table_sink.configure(j_field_names, j_field_types) super(CsvTableSink, self).__init__(j_csv_table_sink)
def __init__(self, j_table_sink, field_names, field_types): gateway = get_gateway() j_field_names = utils.to_jarray(gateway.jvm.String, field_names) j_field_types = utils.to_jarray( gateway.jvm.TypeInformation, [_to_java_type(field_type) for field_type in field_types]) j_table_sink = j_table_sink.configure(j_field_names, j_field_types) super(TestTableSink, self).__init__(j_table_sink)
def __init__(self, source_path, field_names, field_types): # type: (str, list[str], list[DataType]) -> None gateway = get_gateway() j_field_names = utils.to_jarray(gateway.jvm.String, field_names) j_field_types = utils.to_jarray(gateway.jvm.TypeInformation, [_to_java_type(field_type) for field_type in field_types]) super(CsvTableSource, self).__init__( gateway.jvm.CsvTableSource(source_path, j_field_names, j_field_types))
def __init__(self, field_names, field_types, out_row=100000): gateway = get_gateway() j_field_names = utils.to_jarray(gateway.jvm.String, field_names) j_field_types = utils.to_jarray( gateway.jvm.TypeInformation, [_to_java_type(field_type) for field_type in field_types]) j_table_sink = gateway.jvm.com.alibaba.flink.sink.PrintTableSink( j_field_names, j_field_types, out_row) super(PrintTableSink, self).__init__(j_table_sink)
def __init__(self, source_path, field_names, field_types): # type: (str, list[str], list[DataType]) -> None gateway = get_gateway() j_field_names = utils.to_jarray(gateway.jvm.String, field_names) j_field_types = utils.to_jarray(gateway.jvm.TypeInformation, [ type_utils.to_java_type(field_type) for field_type in field_types ]) super(CsvTableSource, self).__init__( gateway.jvm.CsvTableSource(source_path, j_field_names, j_field_types))
def __init__(self, field_names=None, data_types=None, j_table_schema=None): if j_table_schema is None: gateway = get_gateway() j_field_names = to_jarray(gateway.jvm.String, field_names) j_data_types = to_jarray( gateway.jvm.TypeInformation, [_to_java_type(item) for item in data_types]) self._j_table_schema = gateway.jvm.TableSchema( j_field_names, j_data_types) else: self._j_table_schema = j_table_schema
def __init__(self, field_names, field_types): gateway = get_gateway() j_print_table_sink = gateway.jvm.org.apache.flink.python.connector.PrintTableSink( ) j_field_names = to_jarray(gateway.jvm.String, field_names) j_field_types = to_jarray( gateway.jvm.TypeInformation, [_to_java_type(field_type) for field_type in field_types]) j_print_table_sink = j_print_table_sink.configure( j_field_names, j_field_types) super(PrintTableSink, self).__init__(j_print_table_sink)
def call(f: Union[str, UserDefinedFunctionWrapper], *args) -> Expression: """ The first parameter `f` could be a str or a Python user-defined function. When it is str, this is a call to a function that will be looked up in a catalog. There are two kinds of functions: - System functions - which are identified with one part names - Catalog functions - which are identified always with three parts names (catalog, database, function) Moreover each function can either be a temporary function or permanent one (which is stored in an external catalog). Based on that two properties the resolution order for looking up a function based on the provided `function_name` is following: - Temporary system function - System function - Temporary catalog function - Catalog function :param f: the path of the function or the Python user-defined function. :param args: parameters of the user-defined function. """ gateway = get_gateway() return Expression(gateway.jvm.Expressions.call( f if isinstance(f, str) else f.java_user_defined_function(), to_jarray(gateway.jvm.Object, [_get_java_expression(arg) for arg in args])))
def _create_judf(self): gateway = get_gateway() def get_python_function_kind(udf_type): JPythonFunctionKind = gateway.jvm.org.apache.flink.table.functions.python.\ PythonFunctionKind if udf_type == "general": return JPythonFunctionKind.GENERAL elif udf_type == "pandas": return JPythonFunctionKind.PANDAS else: raise TypeError("Unsupported udf_type: %s." % udf_type) func = self._func if not isinstance(self._func, UserDefinedFunction): func = DelegatingScalarFunction(self._func) import cloudpickle serialized_func = cloudpickle.dumps(func) j_input_types = utils.to_jarray( gateway.jvm.TypeInformation, [_to_java_type(i) for i in self._input_types]) j_result_type = _to_java_type(self._result_type) j_function_kind = get_python_function_kind(self._udf_type) PythonScalarFunction = gateway.jvm \ .org.apache.flink.table.functions.python.PythonScalarFunction j_scalar_function = PythonScalarFunction(self._name, bytearray(serialized_func), j_input_types, j_result_type, j_function_kind, self._deterministic, _get_python_env()) return j_scalar_function
def over_window(self, *over_windows): """ Defines over-windows on the records of a table. An over-window defines for each record an interval of records over which aggregation functions can be computed. Example: :: >>> table.window(Over.partition_by("c").order_by("rowTime") \\ ... .preceding("10.seconds").alias("ow")) \\ ... .select("c, b.count over ow, e.sum over ow") .. note:: Computing over window aggregates on a streaming table is only a parallel operation if the window is partitioned. Otherwise, the whole stream will be processed by a single task, i.e., with parallelism 1. .. note:: Over-windows for batch tables are currently not supported. :param over_windows: over windows created from :class:`~pyflink.table.window.Over`. :type over_windows: pyflink.table.window.OverWindow :return: A over windowed table. :rtype: pyflink.table.OverWindowedTable """ gateway = get_gateway() window_array = to_jarray( gateway.jvm.OverWindow, [item._java_over_window for item in over_windows]) return OverWindowedTable(self._j_table.window(window_array), self._t_env)
def java_user_defined_function(self): if self._judf_placeholder is None: gateway = get_gateway() def get_python_function_kind(): JPythonFunctionKind = gateway.jvm.org.apache.flink.table.functions.python. \ PythonFunctionKind if self._func_type == "general": return JPythonFunctionKind.GENERAL elif self._func_type == "pandas": return JPythonFunctionKind.PANDAS else: raise TypeError("Unsupported func_type: %s." % self._func_type) if self._input_types is not None: j_input_types = utils.to_jarray( gateway.jvm.TypeInformation, [_to_java_type(i) for i in self._input_types]) else: j_input_types = None j_function_kind = get_python_function_kind() func = self._func if not isinstance(self._func, UserDefinedFunction): func = self._create_delegate_function() import cloudpickle serialized_func = cloudpickle.dumps(func) self._judf_placeholder = \ self._create_judf(serialized_func, j_input_types, j_function_kind) return self._judf_placeholder
def over_window(self, *over_windows: OverWindow) -> 'OverWindowedTable': """ Defines over-windows on the records of a table. An over-window defines for each record an interval of records over which aggregation functions can be computed. Example: :: >>> from pyflink.table import expressions as expr >>> tab.over_window(Over.partition_by(tab.c).order_by(tab.rowtime) \\ ... .preceding(lit(10).seconds).alias("ow")) \\ ... .select(tab.c, tab.b.count.over(col('ow'), tab.e.sum.over(col('ow')))) .. note:: Computing over window aggregates on a streaming table is only a parallel operation if the window is partitioned. Otherwise, the whole stream will be processed by a single task, i.e., with parallelism 1. .. note:: Over-windows for batch tables are currently not supported. :param over_windows: over windows created from :class:`~pyflink.table.window.Over`. :return: A over windowed table. """ gateway = get_gateway() window_array = to_jarray( gateway.jvm.OverWindow, [item._java_over_window for item in over_windows]) return OverWindowedTable(self._j_table.window(window_array), self._t_env)
def scan(self, *table_path): """ Scans a registered table and returns the resulting :class:`Table`. A table to scan must be registered in the TableEnvironment. It can be either directly registered or be an external member of a :class:`pyflink.table.catalog.Catalog`. See the documentation of :func:`~pyflink.table.TableEnvironment.use_database` or :func:`~pyflink.table.TableEnvironment.use_catalog` for the rules on the path resolution. Examples: Scanning a directly registered table :: >>> tab = t_env.scan("tableName") Scanning a table from a registered catalog :: >>> tab = t_env.scan("catalogName", "dbName", "tableName") :param table_path: The path of the table to scan. :throws: Exception if no table is found using the given table path. :return: The resulting :class:`Table` """ gateway = get_gateway() j_table_paths = utils.to_jarray(gateway.jvm.String, table_path) j_table = self._j_tenv.scan(j_table_paths) return Table(j_table)
def _create_judf(self, is_blink_planner, table_config): func = self._func if not isinstance(self._func, UserDefinedFunction): func = DelegatingScalarFunction(self._func) import cloudpickle serialized_func = cloudpickle.dumps(func) gateway = get_gateway() j_input_types = utils.to_jarray( gateway.jvm.TypeInformation, [_to_java_type(i) for i in self._input_types]) j_result_type = _to_java_type(self._result_type) if is_blink_planner: PythonTableUtils = gateway.jvm\ .org.apache.flink.table.planner.utils.python.PythonTableUtils j_scalar_function = PythonTableUtils \ .createPythonScalarFunction(table_config, self._name, bytearray(serialized_func), j_input_types, j_result_type, self._deterministic, _get_python_env()) else: PythonTableUtils = gateway.jvm.PythonTableUtils j_scalar_function = PythonTableUtils \ .createPythonScalarFunction(self._name, bytearray(serialized_func), j_input_types, j_result_type, self._deterministic, _get_python_env()) return j_scalar_function
def _create_judf(self, serialized_func, j_input_types, j_function_kind): if self._func_type == "pandas": from pyflink.table.types import DataTypes self._accumulator_type = DataTypes.ARRAY(self._result_type) if j_input_types is not None: gateway = get_gateway() j_input_types = utils.to_jarray( gateway.jvm.DataType, [_to_java_data_type(i) for i in self._input_types]) j_result_type = _to_java_data_type(self._result_type) j_accumulator_type = _to_java_data_type(self._accumulator_type) gateway = get_gateway() if self._is_table_aggregate: PythonAggregateFunction = gateway.jvm \ .org.apache.flink.table.functions.python.PythonTableAggregateFunction else: PythonAggregateFunction = gateway.jvm \ .org.apache.flink.table.functions.python.PythonAggregateFunction j_aggregate_function = PythonAggregateFunction( self._name, bytearray(serialized_func), j_input_types, j_result_type, j_accumulator_type, j_function_kind, self._deterministic, self._takes_row_as_input, _get_python_env()) return j_aggregate_function
def over_window(self, *over_windows): """ Defines over-windows on the records of a table. An over-window defines for each record an interval of records over which aggregation functions can be computed. Example: :: >>> table.window(Over.partition_by("c").order_by("rowTime")\ ... .preceding("10.seconds").alias("ow"))\ ... .select("c, b.count over ow, e.sum over ow") .. note:: Computing over window aggregates on a streaming table is only a parallel operation if the window is partitioned. Otherwise, the whole stream will be processed by a single task, i.e., with parallelism 1. .. note:: Over-windows for batch tables are currently not supported. :param over_windows: :class:`OverWindow`s created from :class:`Over`. :return: A :class:`OverWindowedTable`. """ gateway = get_gateway() window_array = to_jarray(gateway.jvm.OverWindow, [item._java_over_window for item in over_windows]) return OverWindowedTable(self._j_table.window(window_array))
def register_table_sink(self, name, field_names, field_types, table_sink): """ Registers an external :class:`TableSink` with given field names and types in this :class:`TableEnvironment`'s catalog. Registered sink tables can be referenced in SQL DML statements. :param name: The name under which the :class:`TableSink` is registered. :param field_names: The field names to register with the :class:`TableSink`. :param field_types: The field types to register with the :class:`TableSink`. :param table_sink: The :class:`TableSink` to register. """ gateway = get_gateway() j_field_names = utils.to_jarray(gateway.jvm.String, field_names) j_field_types = utils.to_jarray( gateway.jvm.TypeInformation, [_to_java_type(field_type) for field_type in field_types]) self._j_tenv.registerTableSink(name, j_field_names, j_field_types, table_sink._j_table_sink)
def register_table_sink(self, name, field_names, field_types, table_sink): """ Registers an external :class:`TableSink` with given field names and types in this :class:`TableEnvironment`'s catalog. Registered sink tables can be referenced in SQL DML statements. :param name: The name under which the :class:`TableSink` is registered. :param field_names: The field names to register with the :class:`TableSink`. :param field_types: The field types to register with the :class:`TableSink`. :param table_sink: The :class:`TableSink` to register. """ gateway = get_gateway() j_field_names = utils.to_jarray(gateway.jvm.String, field_names) j_field_types = utils.to_jarray( gateway.jvm.TypeInformation, [type_utils.to_java_type(field_type) for field_type in field_types]) self._j_tenv.registerTableSink(name, j_field_names, j_field_types, table_sink._j_table_sink)
def or_(predicate0: Union[bool, Expression[bool]], predicate1: Union[bool, Expression[bool]], *predicates: Union[bool, Expression[bool]]) -> Expression[bool]: """ Boolean OR in three-valued logic. """ gateway = get_gateway() predicates = to_jarray(gateway.jvm.Object, [_get_java_expression(p) for p in predicates]) return _ternary_op("or", predicate0, predicate1, predicates)
def concat(first: Union[str, Expression[str]], *others: Union[str, Expression[str]]) -> Expression[str]: """ Returns the string that results from concatenating the arguments. Returns NULL if any argument is NULL. """ gateway = get_gateway() return _binary_op("concat", first, to_jarray(gateway.jvm.Object, [_get_java_expression(other) for other in others]))
def __init__(self, file_path, field_names, field_types, field_delimiter=None, line_delimiter=None): gateway = get_gateway() j_builder = gateway.jvm.org.apache.flink.python.connector.CsvRetractTableSink.Builder( ) j_builder.withFilePath(file_path) j_field_names = to_jarray(gateway.jvm.String, field_names) j_field_types = to_jarray( gateway.jvm.TypeInformation, [_to_java_type(field_type) for field_type in field_types]) j_builder.withSchema(j_field_names, j_field_types) if field_delimiter is not None: j_builder.withFieldDelimiter(field_delimiter) if line_delimiter is not None: j_builder.withLineDelimiter(line_delimiter) super(CsvRetractTableSink, self).__init__(j_builder.build())
def row(head, *tail) -> Expression: """ Creates a row of expressions. Example: :: >>> tab.select(row("key1", 1)) """ gateway = get_gateway() tail = to_jarray(gateway.jvm.Object, [_get_java_expression(t) for t in tail]) return _binary_op("row", head, tail)
def array(head, *tail) -> Expression: """ Creates an array of literals. Example: :: >>> tab.select(array(1, 2, 3)) """ gateway = get_gateway() tail = to_jarray(gateway.jvm.Object, [_get_java_expression(t) for t in tail]) return _binary_op("array", head, tail)
def sink(sql: str, type_info: RowTypeInfo, jdbc_connection_options: 'JdbcConnectionOptions', jdbc_execution_options: 'JdbcExecutionOptions' = None): """ Create a JDBC sink. :param sql: arbitrary DML query (e.g. insert, update, upsert) :param type_info: A RowTypeInfo for query field types. :param jdbc_execution_options: parameters of execution, such as batch size and maximum retries. :param jdbc_connection_options: parameters of connection, such as JDBC URL. :return: A JdbcSink. """ sql_types = [] gateway = get_gateway() JJdbcTypeUtil = gateway.jvm.org.apache.flink.connector.jdbc.utils.JdbcTypeUtil for field_type in type_info.get_field_types(): if isinstance(field_type, WrapperTypeInfo): sql_types.append(JJdbcTypeUtil .typeInformationToSqlType(field_type.get_java_type_info())) else: raise ValueError('field_type must be WrapperTypeInfo') j_sql_type = to_jarray(gateway.jvm.int, sql_types) output_format_clz = gateway.jvm.Class\ .forName('org.apache.flink.connector.jdbc.internal.JdbcBatchingOutputFormat', False, get_gateway().jvm.Thread.currentThread().getContextClassLoader()) j_int_array_type = to_jarray(gateway.jvm.int, []).getClass() j_builder_method = output_format_clz.getDeclaredMethod('createRowJdbcStatementBuilder', to_jarray(gateway.jvm.Class, [j_int_array_type])) j_builder_method.setAccessible(True) j_statement_builder = j_builder_method.invoke(None, to_jarray(gateway.jvm.Object, [j_sql_type])) jdbc_execution_options = jdbc_execution_options if jdbc_execution_options is not None \ else JdbcExecutionOptions.defaults() j_jdbc_sink = gateway.jvm.org.apache.flink.connector.jdbc.JdbcSink\ .sink(sql, j_statement_builder, jdbc_execution_options._j_jdbc_execution_options, jdbc_connection_options._j_jdbc_connection_options) return JdbcSink(j_jdbc_sink=j_jdbc_sink)
def get_function_definition(f): if isinstance(f, UserDefinedTableFunctionWrapper): """ TypeInference was not supported for TableFunction in the old planner. Use TableFunctionDefinition to work around this issue. """ j_result_types = to_jarray(gateway.jvm.TypeInformation, [_to_java_type(i) for i in f._result_types]) j_result_type = gateway.jvm.org.apache.flink.api.java.typeutils.RowTypeInfo( j_result_types) return gateway.jvm.org.apache.flink.table.functions.TableFunctionDefinition( 'f', f.java_user_defined_function(), j_result_type) else: return f.java_user_defined_function()
def invoke_java_object_method(obj, method_name): clz = obj.getClass() j_method = None while clz is not None: try: j_method = clz.getDeclaredMethod(method_name, None) if j_method is not None: break except: clz = clz.getSuperclass() if j_method is None: raise Exception("No such method: " + method_name) j_method.setAccessible(True) return j_method.invoke(obj, to_jarray(get_gateway().jvm.Object, []))
def alias(self, name: str, *extra_names: str) -> 'Expression[T]': """ Specifies a name for an expression i.e. a field. Example: :: >>> tab.select(col('a').alias('b')) :param name: name for one field. :param extra_names: additional names if the expression expands to multiple fields """ gateway = get_gateway() return _ternary_op("as")(self, name, to_jarray(gateway.jvm.String, extra_names))
def _create_judf(self, serialized_func, j_input_types, j_function_kind): gateway = get_gateway() j_result_types = utils.to_jarray( gateway.jvm.TypeInformation, [_to_java_type(i) for i in self._result_types]) j_result_type = gateway.jvm.org.apache.flink.api.java.typeutils.RowTypeInfo( j_result_types) PythonTableFunction = gateway.jvm \ .org.apache.flink.table.functions.python.PythonTableFunction j_table_function = PythonTableFunction(self._name, bytearray(serialized_func), j_input_types, j_result_type, j_function_kind, self._deterministic, _get_python_env()) return j_table_function
def with_columns(head, *tails) -> Expression: """ Creates an expression that selects a range of columns. It can be used wherever an array of expression is accepted such as function calls, projections, or groupings. A range can either be index-based or name-based. Indices start at 1 and boundaries are inclusive. e.g. with_columns(range_("b", "c")) or with_columns(col("*")) .. seealso:: :func:`~pyflink.table.expressions.range_`, :func:`~pyflink.table.expressions.without_columns` """ gateway = get_gateway() tails = to_jarray(gateway.jvm.Object, [_get_java_expression(t) for t in tails]) return _binary_op("withColumns", head, tails)
def insert_into(self, table_path, *table_path_continued): """ Writes the :class:`Table` to a :class:`TableSink` that was registered under the specified name. For the path resolution algorithm see :func:`~TableEnvironment.useDatabase`. Example: :: >>> tab.insert_into("print") :param table_path: The first part of the path of the registered :class:`TableSink` to which the :class:`Table` is written. This is to ensure at least the name of the :class:`Table` is provided. :param table_path_continued: The remaining part of the path of the registered :class:`TableSink` to which the :class:`Table` is written. """ gateway = get_gateway() j_table_path = to_jarray(gateway.jvm.String, table_path_continued) self._j_table.insertInto(table_path, j_table_path)
def scan(self, *table_path): """ Scans a registered table and returns the resulting :class:`Table`. A table to scan must be registered in the TableEnvironment. It can be either directly registered as TableSource or Table. Examples: Scanning a directly registered table :: >>> tab = t_env.scan("tableName") Scanning a table from a registered catalog :: >>> tab = t_env.scan("catalogName", "dbName", "tableName") :param table_path: The path of the table to scan. :throws: Exception if no table is found using the given table path. :return: The resulting :class:`Table` """ gateway = get_gateway() j_table_paths = utils.to_jarray(gateway.jvm.String, table_path) j_table = self._j_tenv.scan(j_table_paths) return Table(j_table)