Example #1
0
    def alias(self, alias):
        """
        Set the preceding offset (based on time or row-count intervals) for over window.

        :param alias: Preceding offset relative to the current row.
        :return: An over window with defined preceding.
        """
        # type: (str) -> OverWindow
        return OverWindow(get_method(self._java_over_window, "as")(alias))
Example #2
0
    def alias(self, alias):
        """
        Assigns an alias for this window that the following
        :func:`~pyflink.table.OverWindowedTable.select` clause can refer to.

        :param alias: Alias for this over window.
        :return: The fully defined over window.
        """
        # type: (str) -> OverWindow
        return OverWindow(get_method(self._java_over_window, "as")(alias))
Example #3
0
    def alias(self, fields):
        """
        Renames the fields of the expression result. Use this to disambiguate fields before
        joining to operations.
        Example:
        ::
            >>> tab.alias("a, b")

        :param fields: Field list expression string.
        :return: Result table.
        """
        return Table(get_method(self._j_table, "as")(fields))
Example #4
0
    def alias(self, alias):
        """
        Assigns an alias for this window that the following
        :func:`~pyflink.table.GroupWindowedTable.group_by` and
        :func:`~pyflink.table.WindowGroupedTable.select` clause can refer to.
        :func:`~pyflink.table.WindowGroupedTable.select` statement can access window properties
        such as window start or end time.

        :param alias: Alias for this window.
        :return: This window.
        """
        # type: (str) -> GroupWindow
        return GroupWindow(get_method(self._java_window, "as")(alias))
Example #5
0
    def from_origin_field(self, origin_field_name):
        """
        Specifies the origin of the previously defined field. The origin field is defined by a
        connector or format.

        E.g. field("myString", Types.STRING).from_origin_field("CSV_MY_STRING")

        ..note::
            Field names are matched by the exact name by default (case sensitive).

        :param origin_field_name: The origin field name.
        :return: This schema object.
        """
        self._j_schema = get_method(self._j_schema, "from")(origin_field_name)
        return self
Example #6
0
 def __init__(self, target_id, gateway_client):
     JavaObject.__init__(self, target_id, gateway_client)
     self._add = get_method(self, 'add')
     self._clear = get_method(self, 'clear')
     self._remove = get_method(self, 'remove')
Example #7
0
    def test_execute_group_window_aggregate_from_json_plan(self):
        # create source file path
        tmp_dir = self.tempdir
        data = [
            '1,1,2,2018-03-11 03:10:00', '3,3,2,2018-03-11 03:10:00',
            '2,2,1,2018-03-11 03:10:00', '1,1,3,2018-03-11 03:40:00',
            '1,1,8,2018-03-11 04:20:00', '2,2,3,2018-03-11 03:30:00'
        ]
        source_path = tmp_dir + '/test_execute_group_window_aggregate_from_json_plan.csv'
        sink_path = tmp_dir + '/test_execute_group_window_aggregate_from_json_plan'
        with open(source_path, 'w') as fd:
            for ele in data:
                fd.write(ele + '\n')

        source_table = """
            CREATE TABLE source_table (
                a TINYINT,
                b SMALLINT,
                c SMALLINT,
                rowtime TIMESTAMP(3),
                WATERMARK FOR rowtime AS rowtime - INTERVAL '60' MINUTE
            ) WITH (
                'connector' = 'filesystem',
                'path' = '%s',
                'format' = 'csv'
            )
        """ % source_path
        self.t_env.execute_sql(source_table)

        self.t_env.execute_sql("""
            CREATE TABLE sink_table (
                a BIGINT,
                w_start TIMESTAMP(3),
                w_end TIMESTAMP(3),
                b BIGINT
            ) WITH (
                'connector' = 'filesystem',
                'path' = '%s',
                'format' = 'csv'
            )
        """ % sink_path)

        self.t_env.create_temporary_function("my_count",
                                             CountAggregateFunction())

        json_plan = self.t_env._j_tenv.getJsonPlan(
            "INSERT INTO sink_table "
            "SELECT a, "
            "SESSION_START(rowtime, INTERVAL '30' MINUTE), "
            "SESSION_END(rowtime, INTERVAL '30' MINUTE), "
            "my_count(c) "
            "FROM source_table "
            "GROUP BY "
            "a, b, SESSION(rowtime, INTERVAL '30' MINUTE)")
        from py4j.java_gateway import get_method
        get_method(self.t_env._j_tenv.executeJsonPlan(json_plan), "await")()

        import glob
        lines = [
            line.strip() for file in glob.glob(sink_path + '/*')
            for line in open(file, 'r')
        ]
        lines.sort()
        self.assertEqual(lines, [
            '1,"2018-03-11 03:10:00","2018-03-11 04:10:00",2',
            '1,"2018-03-11 04:20:00","2018-03-11 04:50:00",1',
            '2,"2018-03-11 03:10:00","2018-03-11 04:00:00",2',
            '3,"2018-03-11 03:10:00","2018-03-11 03:40:00",1'
        ])
 def testGetMethod(self):
     # This is necessary if a field hides a method...
     self.gateway = JavaGateway()
     ex = self.gateway.getNewExample()
     self.assertEqual(1, get_method(ex, "method1")())
Example #9
0
 def __init__(self, target_id, gateway_client):
     JavaObject.__init__(self, target_id, gateway_client)
     self._get = get_method(self, "get")
Example #10
0
 def __init__(self, target_id, gateway_client):
     JavaObject.__init__(self, target_id, gateway_client)
     self._add = get_method(self, "add")
     self._clear = get_method(self, "clear")
     self._remove = get_method(self, "remove")
Example #11
0
def set_field(java_object, field_name, value):
    method = get_method(java_object, field_name + "_$eq")
    return method(value)
Example #12
0
def get_field(java_object, field_name):
    method = get_method(java_object, field_name)
    return method()
Example #13
0
    def test_execute_over_aggregate_from_json_plan(self):
        # create source file path
        tmp_dir = self.tempdir
        data = [
            '1,1,2013-01-01 03:10:00',
            '3,2,2013-01-01 03:10:00',
            '2,1,2013-01-01 03:10:00',
            '1,5,2013-01-01 03:10:00',
            '1,8,2013-01-01 04:20:00',
            '2,3,2013-01-01 03:30:00'
        ]
        source_path = tmp_dir + '/test_execute_over_aggregate_from_json_plan.csv'
        sink_path = tmp_dir + '/test_execute_over_aggregate_from_json_plan'
        with open(source_path, 'w') as fd:
            for ele in data:
                fd.write(ele + '\n')

        source_table = """
            CREATE TABLE source_table (
                a TINYINT,
                b SMALLINT,
                rowtime TIMESTAMP(3),
                WATERMARK FOR rowtime AS rowtime - INTERVAL '60' MINUTE
            ) WITH (
                'connector' = 'filesystem',
                'path' = '%s',
                'format' = 'csv'
            )
        """ % source_path
        self.t_env.execute_sql(source_table)

        self.t_env.execute_sql("""
            CREATE TABLE sink_table (
                a TINYINT,
                b FLOAT,
                c SMALLINT
            ) WITH (
                'connector' = 'filesystem',
                'path' = '%s',
                'format' = 'csv'
            )
        """ % sink_path)

        max_add_min_udaf = udaf(lambda a: a.max() + a.min(),
                                result_type=DataTypes.SMALLINT(),
                                func_type='pandas')
        self.t_env.get_config().set(
            "pipeline.time-characteristic", "EventTime")
        self.t_env.create_temporary_system_function("mean_udaf", mean_udaf)
        self.t_env.create_temporary_system_function("max_add_min_udaf", max_add_min_udaf)

        json_plan = self.t_env._j_tenv.compilePlanSql("""
        insert into sink_table
            select a,
             mean_udaf(b)
             over (PARTITION BY a ORDER BY rowtime
             ROWS BETWEEN 1 PRECEDING AND CURRENT ROW),
             max_add_min_udaf(b)
             over (PARTITION BY a ORDER BY rowtime
             ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)
            from source_table
        """)
        from py4j.java_gateway import get_method
        get_method(json_plan.execute(), "await")()

        import glob
        lines = [line.strip() for file in glob.glob(sink_path + '/*') for line in open(file, 'r')]
        lines.sort()
        self.assertEqual(lines, ['1,1.0,2', '1,3.0,6', '1,6.5,13', '2,1.0,2', '2,2.0,4', '3,2.0,4'])
 def __init__(self, target_id, gateway_client):
     JavaObject.__init__(self, target_id, gateway_client)
     self._add = get_method(self, "add")
     self._clear = get_method(self, "clear")
     self._remove = get_method(self, "remove")
Example #15
0
def get_field(java_object, field_name):
    method = get_method(java_object, field_name)
    return method()
Example #16
0
def set_field(java_object, field_name, value):
    method = get_method(java_object, field_name + "_$eq")
    return method(value)
Example #17
0
 def __init__(self, target_id, gateway_client):
     JavaObject.__init__(self, target_id, gateway_client)
     self._add = get_method(self, 'add')
     self._clear = get_method(self, 'clear')
     self._remove = get_method(self, 'remove')
Example #18
0
 def __init__(self, target_id, gateway_client):
     JavaObject.__init__(self, target_id, gateway_client)
     self.java_remove = get_method(self, "remove")
Example #19
0
 def __init__(self, target_id, gateway_client):
     JavaObject.__init__(self, target_id, gateway_client)
     self.java_remove = get_method(self, 'remove')
Example #20
0
 def testGetMethod(self):
     # This is necessary if a field hides a method...
     self.gateway = JavaGateway()
     ex = self.gateway.getNewExample()
     self.assertEqual(1, get_method(ex, 'method1')())
Example #21
0
 def __init__(self, target_id, gateway_client):
     JavaObject.__init__(self, target_id, gateway_client)
     self._get = get_method(self, 'get')