def add_or_replace_columns(self, *fields: Union[str, Expression]) -> 'Table': """ Adds additional columns. Similar to a SQL SELECT statement. The field expressions can contain complex expressions, but can not contain aggregations. Existing fields will be replaced if add columns name is the same as the existing column name. Moreover, if the added fields have duplicate field name, then the last one is used. Example: :: >>> from pyflink.table import expressions as expr >>> tab.add_or_replace_columns((tab.a + 1).alias('a1'), ... expr.concat(tab.b, 'sunny').alias('b1')) >>> tab.add_or_replace_columns("a + 1 as a1, concat(b, 'sunny') as b1") :param fields: Column list string. :return: The result table. """ if all(isinstance(f, Expression) for f in fields): return Table( self._j_table.addOrReplaceColumns( to_expression_jarray(fields)), self._t_env) else: assert len(fields) == 1 assert isinstance(fields[0], str) return Table(self._j_table.addOrReplaceColumns(fields[0]), self._t_env)
def add_columns(self, *fields: Union[str, Expression]): """ Adds additional columns. Similar to a SQL SELECT statement. The field expressions can contain complex expressions, but can not contain aggregations. It will throw an exception if the added fields already exist. Example: :: >>> from pyflink.table import expressions as expr >>> tab.add_columns((tab.a + 1).alias('a1'), expr.concat(tab.b, 'sunny').alias('b1')) >>> tab.add_columns("a + 1 as a1, concat(b, 'sunny') as b1") :param fields: Column list string. :return: The result table. :rtype: pyflink.table.Table """ if all(isinstance(f, Expression) for f in fields): return Table( self._j_table.addColumns(to_expression_jarray(fields)), self._t_env) else: assert len(fields) == 1 assert isinstance(fields[0], str) return Table(self._j_table.addColumns(fields[0]), self._t_env)
def select(self, *fields: Union[str, Expression]) -> 'Table': """ Performs a selection operation after an aggregate operation. The field expressions cannot contain table functions and aggregations. Example: "" >>> agg = udaf(lambda a: (a.mean(), a.max()), ... result_type=DataTypes.ROW( ... [DataTypes.FIELD("a", DataTypes.FLOAT()), ... DataTypes.FIELD("b", DataTypes.INT())]), ... func_type="pandas") >>> tab.aggregate(agg(tab.a).alias("a", "b")).select("a, b") :param fields: Expression string. :return: The result table. """ if all(isinstance(f, Expression) for f in fields): return Table(self._j_table.select(to_expression_jarray(fields)), self._t_env) else: assert len(fields) == 1 assert isinstance(fields[0], str) return Table(self._j_table.select(fields[0]), self._t_env)
def order_by(self, *fields: Union[str, Expression]): """ Sorts the given :class:`~pyflink.table.Table`. Similar to SQL ORDER BY. The resulting Table is sorted globally sorted across all parallel partitions. Example: :: >>> tab.order_by(tab.name.desc) >>> tab.order_by("name.desc") For unbounded tables, this operation requires a sorting on a time attribute or a subsequent fetch operation. :param fields: Order fields expression string. :return: The result table. :rtype: pyflink.table.Table """ if all(isinstance(f, Expression) for f in fields): return Table(self._j_table.orderBy(to_expression_jarray(fields)), self._t_env) else: assert len(fields) == 1 assert isinstance(fields[0], str) return Table(self._j_table.orderBy(fields[0]), self._t_env)
def select(self, *fields: Union[str, Expression]): """ Performs a selection operation on a over windowed table. Similar to an SQL SELECT statement. The field expressions can contain complex expressions and aggregations. Example: :: >>> over_windowed_table.select(col('c'), ... col('b').count.over(col('ow')), ... col('e').sum.over(col('ow'))) >>> over_windowed_table.select("c, b.count over ow, e.sum over ow") :param fields: Expression string. :return: The result table. :rtype: pyflink.table.Table """ if all(isinstance(f, Expression) for f in fields): return Table(self._j_table.select(to_expression_jarray(fields)), self._t_env) else: assert len(fields) == 1 assert isinstance(fields[0], str) return Table(self._j_table.select(fields[0]), self._t_env)
def select(self, *fields: Union[str, Expression]): """ Performs a selection operation on a window grouped table. Similar to an SQL SELECT statement. The field expressions can contain complex expressions and aggregations. Example: :: >>> window_grouped_table.select(col('key'), ... col('window').start, ... col('value').avg.alias('valavg')) >>> window_grouped_table.select("key, window.start, value.avg as valavg") :param fields: Expression string. :return: The result table. :rtype: pyflink.table.Table """ if all(isinstance(f, Expression) for f in fields): return Table(self._j_table.select(to_expression_jarray(fields)), self._t_env) else: assert len(fields) == 1 assert isinstance(fields[0], str) return Table(self._j_table.select(fields[0]), self._t_env)
def partition_by(cls, *partition_by: Expression) -> 'OverWindowPartitioned': """ Partitions the elements on some partition keys. Each partition is individually sorted and aggregate functions are applied to each partition separately. :param partition_by: List of field references. :return: An over window with defined partitioning. """ return OverWindowPartitioned(get_gateway().jvm.Over.partitionBy( to_expression_jarray(partition_by)))
def partition_by( cls, *partition_by: Union[str, Expression]) -> 'OverWindowPartitioned': """ Partitions the elements on some partition keys. Each partition is individually sorted and aggregate functions are applied to each partition separately. :param partition_by: List of field references. :return: An over window with defined partitioning. """ if all(isinstance(f, Expression) for f in partition_by): return OverWindowPartitioned(get_gateway().jvm.Over.partitionBy( to_expression_jarray(partition_by))) else: assert len(partition_by) == 1 assert isinstance(partition_by[0], str) return OverWindowPartitioned(get_gateway().jvm.Over.partitionBy( partition_by[0]))
def drop_columns(self, *fields: Union[str, Expression]) -> 'Table': """ Drops existing columns. The field expressions should be field reference expressions. Example: :: >>> tab.drop_columns(tab.a, tab.b) >>> tab.drop_columns("a, b") :param fields: Column list string. :return: The result table. """ if all(isinstance(f, Expression) for f in fields): return Table( self._j_table.dropColumns(to_expression_jarray(fields)), self._t_env) else: assert len(fields) == 1 assert isinstance(fields[0], str) return Table(self._j_table.dropColumns(fields[0]), self._t_env)
def select(self, *fields: Union[str, Expression]) -> 'Table': """ Performs a selection operation on a grouped table. Similar to an SQL SELECT statement. The field expressions can contain complex expressions and aggregations. Example: :: >>> tab.group_by(tab.key).select(tab.key, tab.value.avg.alias('average')) >>> tab.group_by("key").select("key, value.avg as average") :param fields: Expression string that contains group keys and aggregate function calls. :return: The result table. """ if all(isinstance(f, Expression) for f in fields): return Table(self._j_table.select(to_expression_jarray(fields)), self._t_env) else: assert len(fields) == 1 assert isinstance(fields[0], str) return Table(self._j_table.select(fields[0]), self._t_env)
def rename_columns(self, *fields: Union[str, Expression]) -> 'Table': """ Renames existing columns. Similar to a field alias statement. The field expressions should be alias expressions, and only the existing fields can be renamed. Example: :: >>> tab.rename_columns(tab.a.alias('a1'), tab.b.alias('b1')) >>> tab.rename_columns("a as a1, b as b1") :param fields: Column list string. :return: The result table. """ if all(isinstance(f, Expression) for f in fields): return Table( self._j_table.renameColumns(to_expression_jarray(fields)), self._t_env) else: assert len(fields) == 1 assert isinstance(fields[0], str) return Table(self._j_table.renameColumns(fields[0]), self._t_env)
def group_by(self, *fields: Union[str, Expression]) -> 'GroupedTable': """ Groups the elements on some grouping keys. Use this before a selection with aggregations to perform the aggregation on a per-group basis. Similar to a SQL GROUP BY statement. Example: :: >>> tab.group_by(tab.key).select(tab.key, tab.value.avg) >>> tab.group_by("key").select("key, value.avg") :param fields: Group keys. :return: The grouped table. """ if all(isinstance(f, Expression) for f in fields): return GroupedTable( self._j_table.groupBy(to_expression_jarray(fields)), self._t_env) else: assert len(fields) == 1 assert isinstance(fields[0], str) return GroupedTable(self._j_table.groupBy(fields[0]), self._t_env)
def select(self, *fields: Union[str, Expression]) -> 'Table': """ Performs a selection operation. Similar to a SQL SELECT statement. The field expressions can contain complex expressions. Example: :: >>> from pyflink.table import expressions as expr >>> tab.select(tab.key, expr.concat(tab.value, 'hello')) >>> tab.select(expr.col('key'), expr.concat(expr.col('value'), 'hello')) >>> tab.select("key, value + 'hello'") :return: The result table. """ if all(isinstance(f, Expression) for f in fields): return Table(self._j_table.select(to_expression_jarray(fields)), self._t_env) else: assert len(fields) == 1 assert isinstance(fields[0], str) return Table(self._j_table.select(fields[0]), self._t_env)
def group_by(self, *fields: Union[str, Expression]) -> 'WindowGroupedTable': """ Groups the elements by a mandatory window and one or more optional grouping attributes. The window is specified by referring to its alias. If no additional grouping attribute is specified and if the input is a streaming table, the aggregation will be performed by a single task, i.e., with parallelism 1. Aggregations are performed per group and defined by a subsequent :func:`~pyflink.table.WindowGroupedTable.select` clause similar to SQL SELECT-GROUP-BY query. Example: :: >>> from pyflink.table import expressions as expr >>> tab.window(Tumble.over(expr.lit(10).minutes).on(tab.rowtime).alias('w')) \\ ... .group_by(col('w')) \\ ... .select(tab.a.sum.alias('a'), ... col('w').start.alias('b'), ... col('w').end.alias('c'), ... col('w').rowtime.alias('d')) :param fields: Group keys. :return: A window grouped table. """ if all(isinstance(f, Expression) for f in fields): return WindowGroupedTable( self._j_table.groupBy(to_expression_jarray(fields)), self._t_env) else: assert len(fields) == 1 assert isinstance(fields[0], str) return WindowGroupedTable(self._j_table.groupBy(fields[0]), self._t_env)