def test_using_decorator(self): my_count = udaf(CountAggregateFunction(), accumulator_type=DataTypes.ARRAY(DataTypes.INT()), result_type=DataTypes.INT()) t = self.t_env.from_elements([(1, 'Hi', 'Hello')], ['a', 'b', 'c']) result = t.group_by(t.c) \ .select(my_count(t.a).alias("a"), t.c.alias("b")) plan = result.explain() result_type = result.get_schema().get_field_data_type(0) self.assertTrue(plan.find("PythonGroupAggregate(groupBy=[c], ") >= 0) self.assertEqual(result_type, DataTypes.INT())
def test_collect_for_all_data_types(self): expected_result = [Row(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932, bytearray(b'pyflink'), 'pyflink', datetime.date(2014, 9, 13), datetime.time(12, 0), datetime.datetime(2018, 3, 11, 3, 0, 0, 123000), [Row(['[pyflink]']), Row(['[pyflink]']), Row(['[pyflink]'])], {1: Row(['[flink]']), 2: Row(['[pyflink]'])}, decimal.Decimal('1000000000000000000.05'), decimal.Decimal( '1000000000000000000.05999999999999999899999999999'))] source = self.t_env.from_elements([(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932, bytearray(b'pyflink'), 'pyflink', datetime.date(2014, 9, 13), datetime.time(hour=12, minute=0, second=0, microsecond=123000), datetime.datetime(2018, 3, 11, 3, 0, 0, 123000), [Row(['pyflink']), Row(['pyflink']), Row(['pyflink'])], {1: Row(['flink']), 2: Row(['pyflink'])}, decimal.Decimal('1000000000000000000.05'), decimal.Decimal( '1000000000000000000.0599999999999999989' '9999999999'))], DataTypes.ROW([DataTypes.FIELD("a", DataTypes.BIGINT()), DataTypes.FIELD("b", DataTypes.BIGINT()), DataTypes.FIELD("c", DataTypes.TINYINT()), DataTypes.FIELD("d", DataTypes.BOOLEAN()), DataTypes.FIELD("e", DataTypes.SMALLINT()), DataTypes.FIELD("f", DataTypes.INT()), DataTypes.FIELD("g", DataTypes.FLOAT()), DataTypes.FIELD("h", DataTypes.DOUBLE()), DataTypes.FIELD("i", DataTypes.BYTES()), DataTypes.FIELD("j", DataTypes.STRING()), DataTypes.FIELD("k", DataTypes.DATE()), DataTypes.FIELD("l", DataTypes.TIME()), DataTypes.FIELD("m", DataTypes.TIMESTAMP(3)), DataTypes.FIELD("n", DataTypes.ARRAY( DataTypes.ROW([DataTypes.FIELD('ss2', DataTypes.STRING())]))), DataTypes.FIELD("o", DataTypes.MAP( DataTypes.BIGINT(), DataTypes.ROW( [DataTypes.FIELD('ss', DataTypes.STRING())]))), DataTypes.FIELD("p", DataTypes.DECIMAL(38, 18)), DataTypes.FIELD("q", DataTypes.DECIMAL(38, 18))])) table_result = source.execute() with table_result.collect() as result: collected_result = [] for i in result: collected_result.append(i) self.assertEqual(expected_result, collected_result)
def test_from_element(self): t_env = self.t_env field_names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q"] field_types = [DataTypes.BIGINT(), DataTypes.DOUBLE(), DataTypes.STRING(), DataTypes.STRING(), DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(3), DataTypes.INTERVAL(DataTypes.SECOND(3)), DataTypes.ARRAY(DataTypes.DOUBLE()), DataTypes.ARRAY(DataTypes.DOUBLE(False)), DataTypes.ARRAY(DataTypes.STRING()), DataTypes.ARRAY(DataTypes.DATE()), DataTypes.DECIMAL(38, 18), DataTypes.ROW([DataTypes.FIELD("a", DataTypes.BIGINT()), DataTypes.FIELD("b", DataTypes.DOUBLE())]), DataTypes.MAP(DataTypes.STRING(), DataTypes.DOUBLE()), DataTypes.BYTES(), PythonOnlyUDT()] schema = DataTypes.ROW( list(map(lambda field_name, field_type: DataTypes.FIELD(field_name, field_type), field_names, field_types))) table_sink = source_sink_utils.TestAppendSink(field_names, field_types) t_env.register_table_sink("Results", table_sink) t = t_env.from_elements( [(1, 1.0, "hi", "hello", datetime.date(1970, 1, 2), datetime.time(1, 0, 0), datetime.datetime(1970, 1, 2, 0, 0), datetime.timedelta(days=1, microseconds=10), [1.0, None], array.array("d", [1.0, 2.0]), ["abc"], [datetime.date(1970, 1, 2)], Decimal(1), Row("a", "b")(1, 2.0), {"key": 1.0}, bytearray(b'ABCD'), PythonOnlyPoint(3.0, 4.0))], schema) t.execute_insert("Results").wait() actual = source_sink_utils.results() expected = ['+I[1, 1.0, hi, hello, 1970-01-02, 01:00:00, 1970-01-02 00:00:00.0, ' '86400000, [1.0, null], [1.0, 2.0], [abc], [1970-01-02], ' '1.000000000000000000, +I[1, 2.0], {key=1.0}, [65, 66, 67, 68], [3.0, 4.0]]'] self.assert_equals(actual, expected)
def test_from_element(self): t_env = self.t_env a = array.array('b') a.fromstring('ABCD') t = t_env.from_elements( [(1, 1.0, "hi", "hello", datetime.date(1970, 1, 2), datetime.time(1, 0, 0), datetime.datetime(1970, 1, 2, 0, 0), [1.0, None], array.array("d", [1.0, 2.0]), ["abc"], [datetime.date(1970, 1, 2)], Decimal(1), Row("a", "b")(1, 2.0), {"key": 1.0}, a, ExamplePoint(1.0, 2.0), PythonOnlyPoint(3.0, 4.0))]) field_names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q"] field_types = [DataTypes.BIGINT(), DataTypes.DOUBLE(), DataTypes.STRING(), DataTypes.STRING(), DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(), DataTypes.ARRAY(DataTypes.DOUBLE()), DataTypes.ARRAY(DataTypes.DOUBLE(False)), DataTypes.ARRAY(DataTypes.STRING()), DataTypes.ARRAY(DataTypes.DATE()), DataTypes.DECIMAL(), DataTypes.ROW([DataTypes.FIELD("a", DataTypes.BIGINT()), DataTypes.FIELD("b", DataTypes.DOUBLE())]), DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.DOUBLE()), DataTypes.VARBINARY(), ExamplePointUDT(), PythonOnlyUDT()] t_env.register_table_sink( "Results", field_names, field_types, source_sink_utils.TestAppendSink()) t.insert_into("Results") t_env.execute() actual = source_sink_utils.results() expected = ['1,1.0,hi,hello,1970-01-02,01:00:00,1970-01-02 00:00:00.0,[1.0, null],' '[1.0, 2.0],[abc],[1970-01-02],1,1,2.0,{key=1.0},[65, 66, 67, 68],[1.0, 2.0],' '[3.0, 4.0]'] self.assert_equals(actual, expected)
def test_all_data_types(self): import pandas as pd import numpy as np @udf(result_type=DataTypes.TINYINT(), func_type="pandas") def tinyint_func(tinyint_param): assert isinstance(tinyint_param, pd.Series) assert isinstance(tinyint_param[0], np.int8), \ 'tinyint_param of wrong type %s !' % type(tinyint_param[0]) return tinyint_param @udf(result_type=DataTypes.SMALLINT(), func_type="pandas") def smallint_func(smallint_param): assert isinstance(smallint_param, pd.Series) assert isinstance(smallint_param[0], np.int16), \ 'smallint_param of wrong type %s !' % type(smallint_param[0]) assert smallint_param[ 0] == 32767, 'smallint_param of wrong value %s' % smallint_param return smallint_param @udf(result_type=DataTypes.INT(), func_type="pandas") def int_func(int_param): assert isinstance(int_param, pd.Series) assert isinstance(int_param[0], np.int32), \ 'int_param of wrong type %s !' % type(int_param[0]) assert int_param[ 0] == -2147483648, 'int_param of wrong value %s' % int_param return int_param @udf(result_type=DataTypes.BIGINT(), func_type="pandas") def bigint_func(bigint_param): assert isinstance(bigint_param, pd.Series) assert isinstance(bigint_param[0], np.int64), \ 'bigint_param of wrong type %s !' % type(bigint_param[0]) return bigint_param @udf(result_type=DataTypes.BOOLEAN(), func_type="pandas") def boolean_func(boolean_param): assert isinstance(boolean_param, pd.Series) assert isinstance(boolean_param[0], np.bool_), \ 'boolean_param of wrong type %s !' % type(boolean_param[0]) return boolean_param @udf(result_type=DataTypes.FLOAT(), func_type="pandas") def float_func(float_param): assert isinstance(float_param, pd.Series) assert isinstance(float_param[0], np.float32), \ 'float_param of wrong type %s !' % type(float_param[0]) return float_param @udf(result_type=DataTypes.DOUBLE(), func_type="pandas") def double_func(double_param): assert isinstance(double_param, pd.Series) assert isinstance(double_param[0], np.float64), \ 'double_param of wrong type %s !' % type(double_param[0]) return double_param @udf(result_type=DataTypes.STRING(), func_type="pandas") def varchar_func(varchar_param): assert isinstance(varchar_param, pd.Series) assert isinstance(varchar_param[0], str), \ 'varchar_param of wrong type %s !' % type(varchar_param[0]) return varchar_param @udf(result_type=DataTypes.BYTES(), func_type="pandas") def varbinary_func(varbinary_param): assert isinstance(varbinary_param, pd.Series) assert isinstance(varbinary_param[0], bytes), \ 'varbinary_param of wrong type %s !' % type(varbinary_param[0]) return varbinary_param @udf(result_type=DataTypes.DECIMAL(38, 18), func_type="pandas") def decimal_func(decimal_param): assert isinstance(decimal_param, pd.Series) assert isinstance(decimal_param[0], decimal.Decimal), \ 'decimal_param of wrong type %s !' % type(decimal_param[0]) return decimal_param @udf(result_type=DataTypes.DATE(), func_type="pandas") def date_func(date_param): assert isinstance(date_param, pd.Series) assert isinstance(date_param[0], datetime.date), \ 'date_param of wrong type %s !' % type(date_param[0]) return date_param @udf(result_type=DataTypes.TIME(), func_type="pandas") def time_func(time_param): assert isinstance(time_param, pd.Series) assert isinstance(time_param[0], datetime.time), \ 'time_param of wrong type %s !' % type(time_param[0]) return time_param timestamp_value = datetime.datetime(1970, 1, 2, 0, 0, 0, 123000) @udf(result_type=DataTypes.TIMESTAMP(3), func_type="pandas") def timestamp_func(timestamp_param): assert isinstance(timestamp_param, pd.Series) assert isinstance(timestamp_param[0], datetime.datetime), \ 'timestamp_param of wrong type %s !' % type(timestamp_param[0]) assert timestamp_param[0] == timestamp_value, \ 'timestamp_param is wrong value %s, should be %s!' % (timestamp_param[0], timestamp_value) return timestamp_param def array_func(array_param): assert isinstance(array_param, pd.Series) assert isinstance(array_param[0], np.ndarray), \ 'array_param of wrong type %s !' % type(array_param[0]) return array_param array_str_func = udf(array_func, result_type=DataTypes.ARRAY(DataTypes.STRING()), func_type="pandas") array_timestamp_func = udf(array_func, result_type=DataTypes.ARRAY( DataTypes.TIMESTAMP(3)), func_type="pandas") array_int_func = udf(array_func, result_type=DataTypes.ARRAY(DataTypes.INT()), func_type="pandas") @udf(result_type=DataTypes.ARRAY(DataTypes.STRING()), func_type="pandas") def nested_array_func(nested_array_param): assert isinstance(nested_array_param, pd.Series) assert isinstance(nested_array_param[0], np.ndarray), \ 'nested_array_param of wrong type %s !' % type(nested_array_param[0]) return pd.Series(nested_array_param[0]) row_type = DataTypes.ROW([ DataTypes.FIELD("f1", DataTypes.INT()), DataTypes.FIELD("f2", DataTypes.STRING()), DataTypes.FIELD("f3", DataTypes.TIMESTAMP(3)), DataTypes.FIELD("f4", DataTypes.ARRAY(DataTypes.INT())) ]) @udf(result_type=row_type, func_type="pandas") def row_func(row_param): assert isinstance(row_param, pd.DataFrame) assert isinstance(row_param.f1, pd.Series) assert isinstance(row_param.f1[0], np.int32), \ 'row_param.f1 of wrong type %s !' % type(row_param.f1[0]) assert isinstance(row_param.f2, pd.Series) assert isinstance(row_param.f2[0], str), \ 'row_param.f2 of wrong type %s !' % type(row_param.f2[0]) assert isinstance(row_param.f3, pd.Series) assert isinstance(row_param.f3[0], datetime.datetime), \ 'row_param.f3 of wrong type %s !' % type(row_param.f3[0]) assert isinstance(row_param.f4, pd.Series) assert isinstance(row_param.f4[0], np.ndarray), \ 'row_param.f4 of wrong type %s !' % type(row_param.f4[0]) return row_param table_sink = source_sink_utils.TestAppendSink([ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u' ], [ DataTypes.TINYINT(), DataTypes.SMALLINT(), DataTypes.INT(), DataTypes.BIGINT(), DataTypes.BOOLEAN(), DataTypes.BOOLEAN(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.STRING(), DataTypes.STRING(), DataTypes.BYTES(), DataTypes.DECIMAL(38, 18), DataTypes.DECIMAL(38, 18), DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(3), DataTypes.ARRAY(DataTypes.STRING()), DataTypes.ARRAY(DataTypes.TIMESTAMP(3)), DataTypes.ARRAY(DataTypes.INT()), DataTypes.ARRAY(DataTypes.STRING()), row_type ]) self.t_env.register_table_sink("Results", table_sink) t = self.t_env.from_elements( [(1, 32767, -2147483648, 1, True, False, 1.0, 1.0, 'hello', '中文', bytearray(b'flink'), decimal.Decimal('1000000000000000000.05'), decimal.Decimal( '1000000000000000000.05999999999999999899999999999'), datetime.date(2014, 9, 13), datetime.time(hour=1, minute=0, second=1), timestamp_value, ['hello', '中文', None], [timestamp_value], [1, 2], [[ 'hello', '中文', None ]], Row(1, 'hello', timestamp_value, [1, 2]))], DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.TINYINT()), DataTypes.FIELD("b", DataTypes.SMALLINT()), DataTypes.FIELD("c", DataTypes.INT()), DataTypes.FIELD("d", DataTypes.BIGINT()), DataTypes.FIELD("e", DataTypes.BOOLEAN()), DataTypes.FIELD("f", DataTypes.BOOLEAN()), DataTypes.FIELD("g", DataTypes.FLOAT()), DataTypes.FIELD("h", DataTypes.DOUBLE()), DataTypes.FIELD("i", DataTypes.STRING()), DataTypes.FIELD("j", DataTypes.STRING()), DataTypes.FIELD("k", DataTypes.BYTES()), DataTypes.FIELD("l", DataTypes.DECIMAL(38, 18)), DataTypes.FIELD("m", DataTypes.DECIMAL(38, 18)), DataTypes.FIELD("n", DataTypes.DATE()), DataTypes.FIELD("o", DataTypes.TIME()), DataTypes.FIELD("p", DataTypes.TIMESTAMP(3)), DataTypes.FIELD("q", DataTypes.ARRAY(DataTypes.STRING())), DataTypes.FIELD("r", DataTypes.ARRAY(DataTypes.TIMESTAMP(3))), DataTypes.FIELD("s", DataTypes.ARRAY(DataTypes.INT())), DataTypes.FIELD( "t", DataTypes.ARRAY(DataTypes.ARRAY(DataTypes.STRING()))), DataTypes.FIELD("u", row_type) ])) t.select( tinyint_func(t.a), smallint_func(t.b), int_func(t.c), bigint_func(t.d), boolean_func(t.e), boolean_func(t.f), float_func(t.g), double_func(t.h), varchar_func(t.i), varchar_func(t.j), varbinary_func(t.k), decimal_func(t.l), decimal_func(t.m), date_func(t.n), time_func(t.o), timestamp_func(t.p), array_str_func(t.q), array_timestamp_func(t.r), array_int_func(t.s), nested_array_func(t.t), row_func(t.u)) \ .execute_insert("Results").wait() actual = source_sink_utils.results() self.assert_equals(actual, [ "+I[1, 32767, -2147483648, 1, true, false, 1.0, 1.0, hello, 中文, " "[102, 108, 105, 110, 107], 1000000000000000000.050000000000000000, " "1000000000000000000.059999999999999999, 2014-09-13, 01:00:01, " "1970-01-02 00:00:00.123, [hello, 中文, null], [1970-01-02 00:00:00.123], " "[1, 2], [hello, 中文, null], +I[1, hello, 1970-01-02 00:00:00.123, [1, 2]]]" ])
def test_all_data_types(self): def boolean_func(bool_param): assert isinstance(bool_param, bool), 'bool_param of wrong type %s !' \ % type(bool_param) return bool_param def tinyint_func(tinyint_param): assert isinstance(tinyint_param, int), 'tinyint_param of wrong type %s !' \ % type(tinyint_param) return tinyint_param def smallint_func(smallint_param): assert isinstance(smallint_param, int), 'smallint_param of wrong type %s !' \ % type(smallint_param) assert smallint_param == 32767, 'smallint_param of wrong value %s' % smallint_param return smallint_param def int_func(int_param): assert isinstance(int_param, int), 'int_param of wrong type %s !' \ % type(int_param) assert int_param == -2147483648, 'int_param of wrong value %s' % int_param return int_param def bigint_func(bigint_param): assert isinstance(bigint_param, int), 'bigint_param of wrong type %s !' \ % type(bigint_param) return bigint_param def bigint_func_none(bigint_param): assert bigint_param is None, 'bigint_param %s should be None!' % bigint_param return bigint_param def float_func(float_param): assert isinstance(float_param, float) and float_equal(float_param, 1.23, 1e-6), \ 'float_param is wrong value %s !' % float_param return float_param def double_func(double_param): assert isinstance(double_param, float) and float_equal(double_param, 1.98932, 1e-7), \ 'double_param is wrong value %s !' % double_param return double_param def bytes_func(bytes_param): assert bytes_param == b'flink', \ 'bytes_param is wrong value %s !' % bytes_param return bytes_param def str_func(str_param): assert str_param == 'pyflink', \ 'str_param is wrong value %s !' % str_param return str_param def date_func(date_param): from datetime import date assert date_param == date(year=2014, month=9, day=13), \ 'date_param is wrong value %s !' % date_param return date_param def time_func(time_param): from datetime import time assert time_param == time(hour=12, minute=0, second=0, microsecond=123000), \ 'time_param is wrong value %s !' % time_param return time_param def timestamp_func(timestamp_param): from datetime import datetime assert timestamp_param == datetime(2018, 3, 11, 3, 0, 0, 123000), \ 'timestamp_param is wrong value %s !' % timestamp_param return timestamp_param def array_func(array_param): assert array_param == [[1, 2, 3]], \ 'array_param is wrong value %s !' % array_param return array_param[0] def map_func(map_param): assert map_param == {1: 'flink', 2: 'pyflink'}, \ 'map_param is wrong value %s !' % map_param return map_param def decimal_func(decimal_param): from decimal import Decimal assert decimal_param == Decimal('1000000000000000000.050000000000000000'), \ 'decimal_param is wrong value %s !' % decimal_param return decimal_param def decimal_cut_func(decimal_param): from decimal import Decimal assert decimal_param == Decimal('1000000000000000000.059999999999999999'), \ 'decimal_param is wrong value %s !' % decimal_param return decimal_param self.t_env.create_temporary_system_function( "boolean_func", udf(boolean_func, result_type=DataTypes.BOOLEAN())) self.t_env.create_temporary_system_function( "tinyint_func", udf(tinyint_func, result_type=DataTypes.TINYINT())) self.t_env.create_temporary_system_function( "smallint_func", udf(smallint_func, result_type=DataTypes.SMALLINT())) self.t_env.create_temporary_system_function( "int_func", udf(int_func, result_type=DataTypes.INT())) self.t_env.create_temporary_system_function( "bigint_func", udf(bigint_func, result_type=DataTypes.BIGINT())) self.t_env.create_temporary_system_function( "bigint_func_none", udf(bigint_func_none, result_type=DataTypes.BIGINT())) self.t_env.create_temporary_system_function( "float_func", udf(float_func, result_type=DataTypes.FLOAT())) self.t_env.create_temporary_system_function( "double_func", udf(double_func, result_type=DataTypes.DOUBLE())) self.t_env.create_temporary_system_function( "bytes_func", udf(bytes_func, result_type=DataTypes.BYTES())) self.t_env.create_temporary_system_function( "str_func", udf(str_func, result_type=DataTypes.STRING())) self.t_env.create_temporary_system_function( "date_func", udf(date_func, result_type=DataTypes.DATE())) self.t_env.create_temporary_system_function( "time_func", udf(time_func, result_type=DataTypes.TIME())) self.t_env.create_temporary_system_function( "timestamp_func", udf(timestamp_func, result_type=DataTypes.TIMESTAMP(3))) self.t_env.create_temporary_system_function( "array_func", udf(array_func, result_type=DataTypes.ARRAY(DataTypes.BIGINT()))) self.t_env.create_temporary_system_function( "map_func", udf(map_func, result_type=DataTypes.MAP(DataTypes.BIGINT(), DataTypes.STRING()))) self.t_env.register_function( "decimal_func", udf(decimal_func, result_type=DataTypes.DECIMAL(38, 18))) self.t_env.register_function( "decimal_cut_func", udf(decimal_cut_func, result_type=DataTypes.DECIMAL(38, 18))) table_sink = source_sink_utils.TestAppendSink([ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q' ], [ DataTypes.BIGINT(), DataTypes.BIGINT(), DataTypes.TINYINT(), DataTypes.BOOLEAN(), DataTypes.SMALLINT(), DataTypes.INT(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.BYTES(), DataTypes.STRING(), DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(3), DataTypes.ARRAY(DataTypes.BIGINT()), DataTypes.MAP(DataTypes.BIGINT(), DataTypes.STRING()), DataTypes.DECIMAL(38, 18), DataTypes.DECIMAL(38, 18) ]) self.t_env.register_table_sink("Results", table_sink) import datetime import decimal t = self.t_env.from_elements( [(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932, bytearray(b'flink'), 'pyflink', datetime.date(2014, 9, 13), datetime.time(hour=12, minute=0, second=0, microsecond=123000), datetime.datetime(2018, 3, 11, 3, 0, 0, 123000), [[1, 2, 3]], { 1: 'flink', 2: 'pyflink' }, decimal.Decimal('1000000000000000000.05'), decimal.Decimal( '1000000000000000000.05999999999999999899999999999'))], DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.BIGINT()), DataTypes.FIELD("b", DataTypes.BIGINT()), DataTypes.FIELD("c", DataTypes.TINYINT()), DataTypes.FIELD("d", DataTypes.BOOLEAN()), DataTypes.FIELD("e", DataTypes.SMALLINT()), DataTypes.FIELD("f", DataTypes.INT()), DataTypes.FIELD("g", DataTypes.FLOAT()), DataTypes.FIELD("h", DataTypes.DOUBLE()), DataTypes.FIELD("i", DataTypes.BYTES()), DataTypes.FIELD("j", DataTypes.STRING()), DataTypes.FIELD("k", DataTypes.DATE()), DataTypes.FIELD("l", DataTypes.TIME()), DataTypes.FIELD("m", DataTypes.TIMESTAMP(3)), DataTypes.FIELD( "n", DataTypes.ARRAY(DataTypes.ARRAY(DataTypes.BIGINT()))), DataTypes.FIELD( "o", DataTypes.MAP(DataTypes.BIGINT(), DataTypes.STRING())), DataTypes.FIELD("p", DataTypes.DECIMAL(38, 18)), DataTypes.FIELD("q", DataTypes.DECIMAL(38, 18)) ])) exec_insert_table( t.select("bigint_func(a), bigint_func_none(b)," "tinyint_func(c), boolean_func(d)," "smallint_func(e),int_func(f)," "float_func(g),double_func(h)," "bytes_func(i),str_func(j)," "date_func(k),time_func(l)," "timestamp_func(m),array_func(n)," "map_func(o),decimal_func(p)," "decimal_cut_func(q)"), "Results") actual = source_sink_utils.results() # Currently the sink result precision of DataTypes.TIME(precision) only supports 0. self.assert_equals(actual, [ "1,null,1,true,32767,-2147483648,1.23,1.98932," "[102, 108, 105, 110, 107],pyflink,2014-09-13," "12:00:00,2018-03-11 03:00:00.123,[1, 2, 3]," "{1=flink, 2=pyflink},1000000000000000000.050000000000000000," "1000000000000000000.059999999999999999" ])
def get_accumulator_type(self): return DataTypes.ARRAY(DataTypes.BIGINT())
def get_accumulator_type(self): return DataTypes.ROW([ DataTypes.FIELD("f0", DataTypes.ARRAY(DataTypes.STRING())), DataTypes.FIELD("f1", DataTypes.BIGINT()) ])
from pyflink.table.udf import udf env = StreamExecutionEnvironment.get_execution_environment() t_env = StreamTableEnvironment.create( env, environment_settings=EnvironmentSettings.new_instance().use_blink_planner().build()) t_env.connect(ChartConnector())\ .with_schema(Schema() .field("city", DataTypes.STRING()) .field("sales_volume", DataTypes.BIGINT()) .field("sales", DataTypes.BIGINT()))\ .register_table_sink("sink") @udf(input_types=[DataTypes.STRING()], result_type=DataTypes.ARRAY(DataTypes.STRING())) def split(line): return line.split(",") @udf(input_types=[DataTypes.ARRAY(DataTypes.STRING()), DataTypes.INT()], result_type=DataTypes.STRING()) def get(array, index): return array[index] t_env.get_config().set_python_executable("python3") t_env.register_function("split", split) t_env.register_function("get", get) t_env.from_table_source(SocketTableSource(port=6666))\ .alias("line")\
def test_all_data_types(self): import pandas as pd import numpy as np def tinyint_func(tinyint_param): assert isinstance(tinyint_param, pd.Series) assert isinstance(tinyint_param[0], np.int8), \ 'tinyint_param of wrong type %s !' % type(tinyint_param[0]) return tinyint_param def smallint_func(smallint_param): assert isinstance(smallint_param, pd.Series) assert isinstance(smallint_param[0], np.int16), \ 'smallint_param of wrong type %s !' % type(smallint_param[0]) assert smallint_param[ 0] == 32767, 'smallint_param of wrong value %s' % smallint_param return smallint_param def int_func(int_param): assert isinstance(int_param, pd.Series) assert isinstance(int_param[0], np.int32), \ 'int_param of wrong type %s !' % type(int_param[0]) assert int_param[ 0] == -2147483648, 'int_param of wrong value %s' % int_param return int_param def bigint_func(bigint_param): assert isinstance(bigint_param, pd.Series) assert isinstance(bigint_param[0], np.int64), \ 'bigint_param of wrong type %s !' % type(bigint_param[0]) return bigint_param def boolean_func(boolean_param): assert isinstance(boolean_param, pd.Series) assert isinstance(boolean_param[0], np.bool_), \ 'boolean_param of wrong type %s !' % type(boolean_param[0]) return boolean_param def float_func(float_param): assert isinstance(float_param, pd.Series) assert isinstance(float_param[0], np.float32), \ 'float_param of wrong type %s !' % type(float_param[0]) return float_param def double_func(double_param): assert isinstance(double_param, pd.Series) assert isinstance(double_param[0], np.float64), \ 'double_param of wrong type %s !' % type(double_param[0]) return double_param def varchar_func(varchar_param): assert isinstance(varchar_param, pd.Series) assert isinstance(varchar_param[0], str), \ 'varchar_param of wrong type %s !' % type(varchar_param[0]) return varchar_param def varbinary_func(varbinary_param): assert isinstance(varbinary_param, pd.Series) assert isinstance(varbinary_param[0], bytes), \ 'varbinary_param of wrong type %s !' % type(varbinary_param[0]) return varbinary_param def decimal_func(decimal_param): assert isinstance(decimal_param, pd.Series) assert isinstance(decimal_param[0], decimal.Decimal), \ 'decimal_param of wrong type %s !' % type(decimal_param[0]) return decimal_param def date_func(date_param): assert isinstance(date_param, pd.Series) assert isinstance(date_param[0], datetime.date), \ 'date_param of wrong type %s !' % type(date_param[0]) return date_param def time_func(time_param): assert isinstance(time_param, pd.Series) assert isinstance(time_param[0], datetime.time), \ 'time_param of wrong type %s !' % type(time_param[0]) return time_param timestamp_value = datetime.datetime(1970, 1, 1, 0, 0, 0, 123000) def timestamp_func(timestamp_param): assert isinstance(timestamp_param, pd.Series) assert isinstance(timestamp_param[0], datetime.datetime), \ 'timestamp_param of wrong type %s !' % type(timestamp_param[0]) assert timestamp_param[0] == timestamp_value, \ 'timestamp_param is wrong value %s, should be %s!' % (timestamp_param[0], timestamp_value) return timestamp_param def array_func(array_param): assert isinstance(array_param, pd.Series) assert isinstance(array_param[0], np.ndarray), \ 'array_param of wrong type %s !' % type(array_param[0]) return array_param def nested_array_func(nested_array_param): assert isinstance(nested_array_param, pd.Series) assert isinstance(nested_array_param[0], np.ndarray), \ 'nested_array_param of wrong type %s !' % type(nested_array_param[0]) return pd.Series(nested_array_param[0]) self.t_env.register_function( "tinyint_func", udf(tinyint_func, [DataTypes.TINYINT()], DataTypes.TINYINT(), udf_type="pandas")) self.t_env.register_function( "smallint_func", udf(smallint_func, [DataTypes.SMALLINT()], DataTypes.SMALLINT(), udf_type="pandas")) self.t_env.register_function( "int_func", udf(int_func, [DataTypes.INT()], DataTypes.INT(), udf_type="pandas")) self.t_env.register_function( "bigint_func", udf(bigint_func, [DataTypes.BIGINT()], DataTypes.BIGINT(), udf_type="pandas")) self.t_env.register_function( "boolean_func", udf(boolean_func, [DataTypes.BOOLEAN()], DataTypes.BOOLEAN(), udf_type="pandas")) self.t_env.register_function( "float_func", udf(float_func, [DataTypes.FLOAT()], DataTypes.FLOAT(), udf_type="pandas")) self.t_env.register_function( "double_func", udf(double_func, [DataTypes.DOUBLE()], DataTypes.DOUBLE(), udf_type="pandas")) self.t_env.register_function( "varchar_func", udf(varchar_func, [DataTypes.STRING()], DataTypes.STRING(), udf_type="pandas")) self.t_env.register_function( "varbinary_func", udf(varbinary_func, [DataTypes.BYTES()], DataTypes.BYTES(), udf_type="pandas")) self.t_env.register_function( "decimal_func", udf(decimal_func, [DataTypes.DECIMAL(38, 18)], DataTypes.DECIMAL(38, 18), udf_type="pandas")) self.t_env.register_function( "date_func", udf(date_func, [DataTypes.DATE()], DataTypes.DATE(), udf_type="pandas")) self.t_env.register_function( "time_func", udf(time_func, [DataTypes.TIME()], DataTypes.TIME(), udf_type="pandas")) self.t_env.register_function( "timestamp_func", udf(timestamp_func, [DataTypes.TIMESTAMP(3)], DataTypes.TIMESTAMP(3), udf_type="pandas")) self.t_env.register_function( "array_str_func", udf(array_func, [DataTypes.ARRAY(DataTypes.STRING())], DataTypes.ARRAY(DataTypes.STRING()), udf_type="pandas")) self.t_env.register_function( "array_timestamp_func", udf(array_func, [DataTypes.ARRAY(DataTypes.TIMESTAMP(3))], DataTypes.ARRAY(DataTypes.TIMESTAMP(3)), udf_type="pandas")) self.t_env.register_function( "array_int_func", udf(array_func, [DataTypes.ARRAY(DataTypes.INT())], DataTypes.ARRAY(DataTypes.INT()), udf_type="pandas")) self.t_env.register_function( "nested_array_func", udf(nested_array_func, [DataTypes.ARRAY(DataTypes.ARRAY(DataTypes.STRING()))], DataTypes.ARRAY(DataTypes.STRING()), udf_type="pandas")) table_sink = source_sink_utils.TestAppendSink([ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't' ], [ DataTypes.TINYINT(), DataTypes.SMALLINT(), DataTypes.INT(), DataTypes.BIGINT(), DataTypes.BOOLEAN(), DataTypes.BOOLEAN(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.STRING(), DataTypes.STRING(), DataTypes.BYTES(), DataTypes.DECIMAL(38, 18), DataTypes.DECIMAL(38, 18), DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(3), DataTypes.ARRAY(DataTypes.STRING()), DataTypes.ARRAY(DataTypes.TIMESTAMP(3)), DataTypes.ARRAY(DataTypes.INT()), DataTypes.ARRAY(DataTypes.STRING()) ]) self.t_env.register_table_sink("Results", table_sink) t = self.t_env.from_elements( [(1, 32767, -2147483648, 1, True, False, 1.0, 1.0, 'hello', '中文', bytearray(b'flink'), decimal.Decimal('1000000000000000000.05'), decimal.Decimal( '1000000000000000000.05999999999999999899999999999'), datetime.date(2014, 9, 13), datetime.time(hour=1, minute=0, second=1), timestamp_value, ['hello', '中文', None], [timestamp_value ], [1, 2], [['hello', '中文', None]])], DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.TINYINT()), DataTypes.FIELD("b", DataTypes.SMALLINT()), DataTypes.FIELD("c", DataTypes.INT()), DataTypes.FIELD("d", DataTypes.BIGINT()), DataTypes.FIELD("e", DataTypes.BOOLEAN()), DataTypes.FIELD("f", DataTypes.BOOLEAN()), DataTypes.FIELD("g", DataTypes.FLOAT()), DataTypes.FIELD("h", DataTypes.DOUBLE()), DataTypes.FIELD("i", DataTypes.STRING()), DataTypes.FIELD("j", DataTypes.STRING()), DataTypes.FIELD("k", DataTypes.BYTES()), DataTypes.FIELD("l", DataTypes.DECIMAL(38, 18)), DataTypes.FIELD("m", DataTypes.DECIMAL(38, 18)), DataTypes.FIELD("n", DataTypes.DATE()), DataTypes.FIELD("o", DataTypes.TIME()), DataTypes.FIELD("p", DataTypes.TIMESTAMP(3)), DataTypes.FIELD("q", DataTypes.ARRAY(DataTypes.STRING())), DataTypes.FIELD("r", DataTypes.ARRAY(DataTypes.TIMESTAMP(3))), DataTypes.FIELD("s", DataTypes.ARRAY(DataTypes.INT())), DataTypes.FIELD( "t", DataTypes.ARRAY(DataTypes.ARRAY(DataTypes.STRING()))) ])) t.select("tinyint_func(a)," "smallint_func(b)," "int_func(c)," "bigint_func(d)," "boolean_func(e)," "boolean_func(f)," "float_func(g)," "double_func(h)," "varchar_func(i)," "varchar_func(j)," "varbinary_func(k)," "decimal_func(l)," "decimal_func(m)," "date_func(n)," "time_func(o)," "timestamp_func(p)," "array_str_func(q)," "array_timestamp_func(r)," "array_int_func(s)," "nested_array_func(t)") \ .insert_into("Results") self.t_env.execute("test") actual = source_sink_utils.results() self.assert_equals(actual, [ "1,32767,-2147483648,1,true,false,1.0,1.0,hello,中文," "[102, 108, 105, 110, 107],1000000000000000000.050000000000000000," "1000000000000000000.059999999999999999,2014-09-13,01:00:01," "1970-01-01 00:00:00.123,[hello, 中文, null],[1970-01-01 00:00:00.123]," "[1, 2],[hello, 中文, null]" ])
from pyflink.table import EnvironmentSettings, BatchTableEnvironment, DataTypes from pyflink.table.udf import udaf env_settings = EnvironmentSettings.new_instance().in_batch_mode( ).use_blink_planner().build() t_env = BatchTableEnvironment.create(environment_settings=env_settings) # Oct, Nov, Dez 2020 + full year 2021 FORCAST_DURATION_HOURS = (31 + 30 + 31 + 365) * 24 @udaf(input_types=[DataTypes.FLOAT(), DataTypes.FLOAT(), DataTypes.FLOAT()], result_type=DataTypes.ARRAY(DataTypes.VARCHAR(100)), func_type='pandas') def predict(ts_float, throughput, capacity): import pandas as pd import json import logging from fbprophet import Prophet logging.info('### Predictor started!') ds = pd.to_datetime(ts_float, unit="s") df_input = pd.DataFrame({'ds': ds, 'y': throughput}) input_size = len(df_input.index) # Füge Wochenend-Feature hinzu def is_saturday(ds): date = pd.to_datetime(ds)
def test_from_element(self): t_env = self.t_env field_names = [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q" ] field_types = [ DataTypes.BIGINT(), DataTypes.DOUBLE(), DataTypes.STRING(), DataTypes.STRING(), DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(3), DataTypes.INTERVAL(DataTypes.SECOND(3)), DataTypes.ARRAY(DataTypes.DOUBLE()), DataTypes.ARRAY(DataTypes.DOUBLE(False)), DataTypes.ARRAY(DataTypes.STRING()), DataTypes.ARRAY(DataTypes.DATE()), DataTypes.DECIMAL(38, 18), DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.BIGINT()), DataTypes.FIELD("b", DataTypes.DOUBLE()) ]), DataTypes.MAP(DataTypes.STRING(), DataTypes.DOUBLE()), DataTypes.BYTES(), PythonOnlyUDT() ] schema = DataTypes.ROW( list( map( lambda field_name, field_type: DataTypes.FIELD( field_name, field_type), field_names, field_types))) sink_table_ddl = """ CREATE TABLE Results( a BIGINT, b DOUBLE, c STRING, d STRING, e DATE, f TIME, g TIMESTAMP(3), h INT, i ARRAY<DOUBLE>, j ARRAY<DOUBLE NOT NULL>, k ARRAY<STRING>, l ARRAY<DATE>, m DECIMAL(38, 18), n ROW<a BIGINT, b DOUBLE>, o MAP<STRING, DOUBLE>, p BYTES, q ARRAY<DOUBLE NOT NULL>) WITH ('connector'='test-sink') """ self.t_env.execute_sql(sink_table_ddl) t = t_env.from_elements( [(1, 1.0, "hi", "hello", datetime.date(1970, 1, 2), datetime.time(1, 0, 0), datetime.datetime(1970, 1, 2, 0, 0), datetime.timedelta(days=1, microseconds=10), [1.0, None], array.array("d", [1.0, 2.0]), ["abc"], [datetime.date(1970, 1, 2)], Decimal(1), Row("a", "b")(1, 2.0), { "key": 1.0 }, bytearray(b'ABCD'), PythonOnlyPoint(3.0, 4.0))], schema) t.execute_insert("Results").wait() actual = source_sink_utils.results() expected = [ '+I[1, 1.0, hi, hello, 1970-01-02, 01:00, 1970-01-02T00:00, ' '86400, [1.0, null], [1.0, 2.0], [abc], [1970-01-02], ' '1.000000000000000000, +I[1, 2.0], {key=1.0}, [65, 66, 67, 68], [3.0, 4.0]]' ] self.assert_equals(actual, expected)
def test_all_data_types(self): def boolean_func(bool_param): assert isinstance(bool_param, bool), 'bool_param of wrong type %s !' \ % type(bool_param) return bool_param def tinyint_func(tinyint_param): assert isinstance(tinyint_param, int), 'tinyint_param of wrong type %s !' \ % type(tinyint_param) return tinyint_param def smallint_func(smallint_param): assert isinstance(smallint_param, int), 'smallint_param of wrong type %s !' \ % type(smallint_param) assert smallint_param == 32767, 'smallint_param of wrong value %s' % smallint_param return smallint_param def int_func(int_param): assert isinstance(int_param, int), 'int_param of wrong type %s !' \ % type(int_param) assert int_param == -2147483648, 'int_param of wrong value %s' % int_param return int_param def bigint_func(bigint_param): assert isinstance(bigint_param, int), 'bigint_param of wrong type %s !' \ % type(bigint_param) return bigint_param def bigint_func_none(bigint_param): assert bigint_param is None, 'bigint_param %s should be None!' % bigint_param return bigint_param def float_func(float_param): assert isinstance(float_param, float) and float_equal(float_param, 1.23, 1e-6), \ 'float_param is wrong value %s !' % float_param return float_param def double_func(double_param): assert isinstance(double_param, float) and float_equal(double_param, 1.98932, 1e-7), \ 'double_param is wrong value %s !' % double_param return double_param def bytes_func(bytes_param): assert bytes_param == b'flink', \ 'bytes_param is wrong value %s !' % bytes_param return bytes_param def str_func(str_param): assert str_param == 'pyflink', \ 'str_param is wrong value %s !' % str_param return str_param def date_func(date_param): from datetime import date assert date_param == date(year=2014, month=9, day=13), \ 'date_param is wrong value %s !' % date_param return date_param def time_func(time_param): from datetime import time assert time_param == time(hour=12, minute=0, second=0, microsecond=123000), \ 'time_param is wrong value %s !' % time_param return time_param def timestamp_func(timestamp_param): from datetime import datetime assert timestamp_param == datetime(2018, 3, 11, 3, 0, 0, 123000), \ 'timestamp_param is wrong value %s !' % timestamp_param return timestamp_param def array_func(array_param): assert array_param == [[1, 2, 3]] or array_param == ((1, 2, 3),), \ 'array_param is wrong value %s !' % array_param return array_param[0] def map_func(map_param): assert map_param == {1: 'flink', 2: 'pyflink'}, \ 'map_param is wrong value %s !' % map_param return map_param def decimal_func(decimal_param): from decimal import Decimal assert decimal_param == Decimal('1000000000000000000.050000000000000000'), \ 'decimal_param is wrong value %s !' % decimal_param return decimal_param def decimal_cut_func(decimal_param): from decimal import Decimal assert decimal_param == Decimal('1000000000000000000.059999999999999999'), \ 'decimal_param is wrong value %s !' % decimal_param return decimal_param self.t_env.create_temporary_system_function( "boolean_func", udf(boolean_func, result_type=DataTypes.BOOLEAN())) self.t_env.create_temporary_system_function( "tinyint_func", udf(tinyint_func, result_type=DataTypes.TINYINT())) self.t_env.create_temporary_system_function( "smallint_func", udf(smallint_func, result_type=DataTypes.SMALLINT())) self.t_env.create_temporary_system_function( "int_func", udf(int_func, result_type=DataTypes.INT())) self.t_env.create_temporary_system_function( "bigint_func", udf(bigint_func, result_type=DataTypes.BIGINT())) self.t_env.create_temporary_system_function( "bigint_func_none", udf(bigint_func_none, result_type=DataTypes.BIGINT())) self.t_env.create_temporary_system_function( "float_func", udf(float_func, result_type=DataTypes.FLOAT())) self.t_env.create_temporary_system_function( "double_func", udf(double_func, result_type=DataTypes.DOUBLE())) self.t_env.create_temporary_system_function( "bytes_func", udf(bytes_func, result_type=DataTypes.BYTES())) self.t_env.create_temporary_system_function( "str_func", udf(str_func, result_type=DataTypes.STRING())) self.t_env.create_temporary_system_function( "date_func", udf(date_func, result_type=DataTypes.DATE())) self.t_env.create_temporary_system_function( "time_func", udf(time_func, result_type=DataTypes.TIME())) self.t_env.create_temporary_system_function( "timestamp_func", udf(timestamp_func, result_type=DataTypes.TIMESTAMP(3))) self.t_env.create_temporary_system_function( "array_func", udf(array_func, result_type=DataTypes.ARRAY(DataTypes.BIGINT()))) self.t_env.create_temporary_system_function( "map_func", udf(map_func, result_type=DataTypes.MAP(DataTypes.BIGINT(), DataTypes.STRING()))) self.t_env.register_function( "decimal_func", udf(decimal_func, result_type=DataTypes.DECIMAL(38, 18))) self.t_env.register_function( "decimal_cut_func", udf(decimal_cut_func, result_type=DataTypes.DECIMAL(38, 18))) sink_table_ddl = """ CREATE TABLE Results( a BIGINT, b BIGINT, c TINYINT, d BOOLEAN, e SMALLINT, f INT, g FLOAT, h DOUBLE, i BYTES, j STRING, k DATE, l TIME, m TIMESTAMP(3), n ARRAY<BIGINT>, o MAP<BIGINT, STRING>, p DECIMAL(38, 18), q DECIMAL(38, 18)) WITH ('connector'='test-sink') """ self.t_env.execute_sql(sink_table_ddl) import datetime import decimal t = self.t_env.from_elements( [(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932, bytearray(b'flink'), 'pyflink', datetime.date(2014, 9, 13), datetime.time(hour=12, minute=0, second=0, microsecond=123000), datetime.datetime(2018, 3, 11, 3, 0, 0, 123000), [[1, 2, 3]], { 1: 'flink', 2: 'pyflink' }, decimal.Decimal('1000000000000000000.05'), decimal.Decimal( '1000000000000000000.05999999999999999899999999999'))], DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.BIGINT()), DataTypes.FIELD("b", DataTypes.BIGINT()), DataTypes.FIELD("c", DataTypes.TINYINT()), DataTypes.FIELD("d", DataTypes.BOOLEAN()), DataTypes.FIELD("e", DataTypes.SMALLINT()), DataTypes.FIELD("f", DataTypes.INT()), DataTypes.FIELD("g", DataTypes.FLOAT()), DataTypes.FIELD("h", DataTypes.DOUBLE()), DataTypes.FIELD("i", DataTypes.BYTES()), DataTypes.FIELD("j", DataTypes.STRING()), DataTypes.FIELD("k", DataTypes.DATE()), DataTypes.FIELD("l", DataTypes.TIME()), DataTypes.FIELD("m", DataTypes.TIMESTAMP(3)), DataTypes.FIELD( "n", DataTypes.ARRAY(DataTypes.ARRAY(DataTypes.BIGINT()))), DataTypes.FIELD( "o", DataTypes.MAP(DataTypes.BIGINT(), DataTypes.STRING())), DataTypes.FIELD("p", DataTypes.DECIMAL(38, 18)), DataTypes.FIELD("q", DataTypes.DECIMAL(38, 18)) ])) t.select(call("bigint_func", t.a), call("bigint_func_none", t.b), call("tinyint_func", t.c), call("boolean_func", t.d), call("smallint_func", t.e), call("int_func", t.f), call("float_func", t.g), call("double_func", t.h), call("bytes_func", t.i), call("str_func", t.j), call("date_func", t.k), call("time_func", t.l), call("timestamp_func", t.m), call("array_func", t.n), call("map_func", t.o), call("decimal_func", t.p), call("decimal_cut_func", t.q)) \ .execute_insert("Results").wait() actual = source_sink_utils.results() # Currently the sink result precision of DataTypes.TIME(precision) only supports 0. self.assert_equals(actual, [ "+I[1, null, 1, true, 32767, -2147483648, 1.23, 1.98932, " "[102, 108, 105, 110, 107], pyflink, 2014-09-13, " "12:00:00.123, 2018-03-11T03:00:00.123, [1, 2, 3], " "{1=flink, 2=pyflink}, 1000000000000000000.050000000000000000, " "1000000000000000000.059999999999999999]" ])
if (datetime.now() - self.last_dump_time).seconds >= self.interval_dump_seconds: r = redis.StrictRedis(**self.redis_params) try: r.set(self.model_name, pickle.dumps(self.clf, protocol=pickle.HIGHEST_PROTOCOL)) except (redis.exceptions.RedisError, TypeError, Exception): logging.warning('无法连接 Redis 以存储模型数据') self.last_dump_time = datetime.now() # 无论是否更新成功,都更新保存时间 model = udf( Model(), input_types=[DataTypes.ARRAY(DataTypes.INT()), DataTypes.TINYINT()], result_type=DataTypes.TINYINT()) t_env.register_function('train_and_predict', model) # ########################### 创建源表(source) ########################### # 使用 Kafka-SQL 连接器从 Kafka 实时消费数据。 t_env.execute_sql(f""" CREATE TABLE source ( x ARRAY<INT>, -- 图片灰度数据 actual_y TINYINT, -- 实际数字 ts TIMESTAMP(3) -- 图片产生时间 ) with ( 'connector' = 'kafka', 'topic' = '{source_topic}',