def alias(self, alias): """ Set the preceding offset (based on time or row-count intervals) for over window. :param alias: Preceding offset relative to the current row. :return: An over window with defined preceding. """ # type: (str) -> OverWindow return OverWindow(get_method(self._java_over_window, "as")(alias))
def alias(self, alias): """ Assigns an alias for this window that the following :func:`~pyflink.table.OverWindowedTable.select` clause can refer to. :param alias: Alias for this over window. :return: The fully defined over window. """ # type: (str) -> OverWindow return OverWindow(get_method(self._java_over_window, "as")(alias))
def alias(self, fields): """ Renames the fields of the expression result. Use this to disambiguate fields before joining to operations. Example: :: >>> tab.alias("a, b") :param fields: Field list expression string. :return: Result table. """ return Table(get_method(self._j_table, "as")(fields))
def alias(self, alias): """ Assigns an alias for this window that the following :func:`~pyflink.table.GroupWindowedTable.group_by` and :func:`~pyflink.table.WindowGroupedTable.select` clause can refer to. :func:`~pyflink.table.WindowGroupedTable.select` statement can access window properties such as window start or end time. :param alias: Alias for this window. :return: This window. """ # type: (str) -> GroupWindow return GroupWindow(get_method(self._java_window, "as")(alias))
def from_origin_field(self, origin_field_name): """ Specifies the origin of the previously defined field. The origin field is defined by a connector or format. E.g. field("myString", Types.STRING).from_origin_field("CSV_MY_STRING") ..note:: Field names are matched by the exact name by default (case sensitive). :param origin_field_name: The origin field name. :return: This schema object. """ self._j_schema = get_method(self._j_schema, "from")(origin_field_name) return self
def __init__(self, target_id, gateway_client): JavaObject.__init__(self, target_id, gateway_client) self._add = get_method(self, 'add') self._clear = get_method(self, 'clear') self._remove = get_method(self, 'remove')
def test_execute_group_window_aggregate_from_json_plan(self): # create source file path tmp_dir = self.tempdir data = [ '1,1,2,2018-03-11 03:10:00', '3,3,2,2018-03-11 03:10:00', '2,2,1,2018-03-11 03:10:00', '1,1,3,2018-03-11 03:40:00', '1,1,8,2018-03-11 04:20:00', '2,2,3,2018-03-11 03:30:00' ] source_path = tmp_dir + '/test_execute_group_window_aggregate_from_json_plan.csv' sink_path = tmp_dir + '/test_execute_group_window_aggregate_from_json_plan' with open(source_path, 'w') as fd: for ele in data: fd.write(ele + '\n') source_table = """ CREATE TABLE source_table ( a TINYINT, b SMALLINT, c SMALLINT, rowtime TIMESTAMP(3), WATERMARK FOR rowtime AS rowtime - INTERVAL '60' MINUTE ) WITH ( 'connector' = 'filesystem', 'path' = '%s', 'format' = 'csv' ) """ % source_path self.t_env.execute_sql(source_table) self.t_env.execute_sql(""" CREATE TABLE sink_table ( a BIGINT, w_start TIMESTAMP(3), w_end TIMESTAMP(3), b BIGINT ) WITH ( 'connector' = 'filesystem', 'path' = '%s', 'format' = 'csv' ) """ % sink_path) self.t_env.create_temporary_function("my_count", CountAggregateFunction()) json_plan = self.t_env._j_tenv.getJsonPlan( "INSERT INTO sink_table " "SELECT a, " "SESSION_START(rowtime, INTERVAL '30' MINUTE), " "SESSION_END(rowtime, INTERVAL '30' MINUTE), " "my_count(c) " "FROM source_table " "GROUP BY " "a, b, SESSION(rowtime, INTERVAL '30' MINUTE)") from py4j.java_gateway import get_method get_method(self.t_env._j_tenv.executeJsonPlan(json_plan), "await")() import glob lines = [ line.strip() for file in glob.glob(sink_path + '/*') for line in open(file, 'r') ] lines.sort() self.assertEqual(lines, [ '1,"2018-03-11 03:10:00","2018-03-11 04:10:00",2', '1,"2018-03-11 04:20:00","2018-03-11 04:50:00",1', '2,"2018-03-11 03:10:00","2018-03-11 04:00:00",2', '3,"2018-03-11 03:10:00","2018-03-11 03:40:00",1' ])
def testGetMethod(self): # This is necessary if a field hides a method... self.gateway = JavaGateway() ex = self.gateway.getNewExample() self.assertEqual(1, get_method(ex, "method1")())
def __init__(self, target_id, gateway_client): JavaObject.__init__(self, target_id, gateway_client) self._get = get_method(self, "get")
def __init__(self, target_id, gateway_client): JavaObject.__init__(self, target_id, gateway_client) self._add = get_method(self, "add") self._clear = get_method(self, "clear") self._remove = get_method(self, "remove")
def set_field(java_object, field_name, value): method = get_method(java_object, field_name + "_$eq") return method(value)
def get_field(java_object, field_name): method = get_method(java_object, field_name) return method()
def test_execute_over_aggregate_from_json_plan(self): # create source file path tmp_dir = self.tempdir data = [ '1,1,2013-01-01 03:10:00', '3,2,2013-01-01 03:10:00', '2,1,2013-01-01 03:10:00', '1,5,2013-01-01 03:10:00', '1,8,2013-01-01 04:20:00', '2,3,2013-01-01 03:30:00' ] source_path = tmp_dir + '/test_execute_over_aggregate_from_json_plan.csv' sink_path = tmp_dir + '/test_execute_over_aggregate_from_json_plan' with open(source_path, 'w') as fd: for ele in data: fd.write(ele + '\n') source_table = """ CREATE TABLE source_table ( a TINYINT, b SMALLINT, rowtime TIMESTAMP(3), WATERMARK FOR rowtime AS rowtime - INTERVAL '60' MINUTE ) WITH ( 'connector' = 'filesystem', 'path' = '%s', 'format' = 'csv' ) """ % source_path self.t_env.execute_sql(source_table) self.t_env.execute_sql(""" CREATE TABLE sink_table ( a TINYINT, b FLOAT, c SMALLINT ) WITH ( 'connector' = 'filesystem', 'path' = '%s', 'format' = 'csv' ) """ % sink_path) max_add_min_udaf = udaf(lambda a: a.max() + a.min(), result_type=DataTypes.SMALLINT(), func_type='pandas') self.t_env.get_config().set( "pipeline.time-characteristic", "EventTime") self.t_env.create_temporary_system_function("mean_udaf", mean_udaf) self.t_env.create_temporary_system_function("max_add_min_udaf", max_add_min_udaf) json_plan = self.t_env._j_tenv.compilePlanSql(""" insert into sink_table select a, mean_udaf(b) over (PARTITION BY a ORDER BY rowtime ROWS BETWEEN 1 PRECEDING AND CURRENT ROW), max_add_min_udaf(b) over (PARTITION BY a ORDER BY rowtime ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) from source_table """) from py4j.java_gateway import get_method get_method(json_plan.execute(), "await")() import glob lines = [line.strip() for file in glob.glob(sink_path + '/*') for line in open(file, 'r')] lines.sort() self.assertEqual(lines, ['1,1.0,2', '1,3.0,6', '1,6.5,13', '2,1.0,2', '2,2.0,4', '3,2.0,4'])
def __init__(self, target_id, gateway_client): JavaObject.__init__(self, target_id, gateway_client) self.java_remove = get_method(self, "remove")
def __init__(self, target_id, gateway_client): JavaObject.__init__(self, target_id, gateway_client) self.java_remove = get_method(self, 'remove')
def testGetMethod(self): # This is necessary if a field hides a method... self.gateway = JavaGateway() ex = self.gateway.getNewExample() self.assertEqual(1, get_method(ex, 'method1')())
def __init__(self, target_id, gateway_client): JavaObject.__init__(self, target_id, gateway_client) self._get = get_method(self, 'get')