def get_accumulator_type(self): return DataTypes.ROW([ DataTypes.FIELD( "f0", DataTypes.MAP_VIEW(DataTypes.STRING(), DataTypes.BIGINT())), DataTypes.FIELD("f1", DataTypes.BIGINT()) ])
def sql_type(cls): return DataTypes.ROW([ DataTypes.FIELD("type", DataTypes.TINYINT()), DataTypes.FIELD("size", DataTypes.INT()), DataTypes.FIELD("indices", DataTypes.ARRAY(DataTypes.INT())), DataTypes.FIELD("values", DataTypes.ARRAY(DataTypes.DOUBLE())), ])
def get_result_type(self): return DataTypes.ROW([ DataTypes.FIELD("f0", DataTypes.STRING()), DataTypes.FIELD("f1", DataTypes.STRING()), DataTypes.FIELD("f2", DataTypes.STRING()), DataTypes.FIELD("f3", DataTypes.BIGINT()) ])
def test_from_element(self): t_env = self.t_env field_names = [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s" ] field_types = [ DataTypes.BIGINT(), DataTypes.DOUBLE(), DataTypes.STRING(), DataTypes.STRING(), DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(), DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(), DataTypes.INTERVAL(DataTypes.DAY(), DataTypes.SECOND()), DataTypes.ARRAY(DataTypes.DOUBLE()), DataTypes.ARRAY(DataTypes.DOUBLE(False)), DataTypes.ARRAY(DataTypes.STRING()), DataTypes.ARRAY(DataTypes.DATE()), DataTypes.DECIMAL(10, 0), DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.BIGINT()), DataTypes.FIELD("b", DataTypes.DOUBLE()) ]), DataTypes.MAP(DataTypes.STRING(), DataTypes.DOUBLE()), DataTypes.BYTES(), ExamplePointUDT(), PythonOnlyUDT() ] schema = DataTypes.ROW( list( map( lambda field_name, field_type: DataTypes.FIELD( field_name, field_type), field_names, field_types))) table_sink = source_sink_utils.TestAppendSink(field_names, field_types) t_env.register_table_sink("Results", table_sink) t = t_env.from_elements( [(1, 1.0, "hi", "hello", datetime.date(1970, 1, 2), datetime.time(1, 0, 0), datetime.datetime( 1970, 1, 2, 0, 0), datetime.datetime(1970, 1, 2, 0, 0), datetime.timedelta(days=1, microseconds=10), [1.0, None], array.array("d", [1.0, 2.0]), ["abc"], [datetime.date(1970, 1, 2)], Decimal(1), Row("a", "b")(1, 2.0), { "key": 1.0 }, bytearray(b'ABCD'), ExamplePoint( 1.0, 2.0), PythonOnlyPoint(3.0, 4.0))], schema) t.insert_into("Results") self.env.execute() actual = source_sink_utils.results() expected = [ '1,1.0,hi,hello,1970-01-02,01:00:00,1970-01-02 00:00:00.0,' '1970-01-02 00:00:00.0,86400000010,[1.0, null],[1.0, 2.0],[abc],[1970-01-02],' '1,1,2.0,{key=1.0},[65, 66, 67, 68],[1.0, 2.0],[3.0, 4.0]' ] self.assert_equals(actual, expected)
def test_row_type_as_input_types_and_result_types(self): # test input_types and result_types are DataTypes.ROW a = udtf(lambda i: i, input_types=DataTypes.ROW([DataTypes.FIELD("a", DataTypes.BIGINT())]), result_types=DataTypes.ROW([DataTypes.FIELD("a", DataTypes.BIGINT())])) self.assertEqual(a._input_types, [DataTypes.ROW([DataTypes.FIELD("a", DataTypes.BIGINT())])]) self.assertEqual(a._result_types, [DataTypes.ROW([DataTypes.FIELD("a", DataTypes.BIGINT())])])
def test_blink_from_element(self): t_env = BatchTableEnvironment.create( environment_settings=EnvironmentSettings.new_instance( ).use_blink_planner().in_batch_mode().build()) field_names = [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q" ] field_types = [ DataTypes.BIGINT(), DataTypes.DOUBLE(), DataTypes.STRING(), DataTypes.STRING(), DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(3), DataTypes.INTERVAL(DataTypes.SECOND(3)), DataTypes.ARRAY(DataTypes.DOUBLE()), DataTypes.ARRAY(DataTypes.DOUBLE(False)), DataTypes.ARRAY(DataTypes.STRING()), DataTypes.ARRAY(DataTypes.DATE()), DataTypes.DECIMAL(38, 18), DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.BIGINT()), DataTypes.FIELD("b", DataTypes.DOUBLE()) ]), DataTypes.MAP(DataTypes.STRING(), DataTypes.DOUBLE()), DataTypes.BYTES(), PythonOnlyUDT() ] schema = DataTypes.ROW( list( map( lambda field_name, field_type: DataTypes.FIELD( field_name, field_type), field_names, field_types))) table_sink = source_sink_utils.TestAppendSink(field_names, field_types) t_env.register_table_sink("Results", table_sink) t = t_env.from_elements( [(1, 1.0, "hi", "hello", datetime.date(1970, 1, 2), datetime.time(1, 0, 0), datetime.datetime(1970, 1, 2, 0, 0), datetime.timedelta(days=1, microseconds=10), [1.0, None], array.array("d", [1.0, 2.0]), ["abc"], [datetime.date(1970, 1, 2)], Decimal(1), Row("a", "b")(1, 2.0), { "key": 1.0 }, bytearray(b'ABCD'), PythonOnlyPoint(3.0, 4.0))], schema) t.insert_into("Results") t_env.execute("test") actual = source_sink_utils.results() expected = [ '1,1.0,hi,hello,1970-01-02,01:00:00,1970-01-02 00:00:00.0,' '86400000,[1.0, null],[1.0, 2.0],[abc],[1970-01-02],' '1.000000000000000000,1,2.0,{key=1.0},[65, 66, 67, 68],[3.0, 4.0]' ] self.assert_equals(actual, expected)
def test_from_element(self): t_env = self.t_env a = array.array('b') a.fromstring('ABCD') t = t_env.from_elements([ (1, 1.0, "hi", "hello", datetime.date(1970, 1, 2), datetime.time(1, 0, 0), datetime.datetime(1970, 1, 2, 0, 0), [1.0, None], array.array("d", [1.0, 2.0]), ["abc"], [datetime.date(1970, 1, 2)], Decimal(1), Row("a", "b")(1, 2.0), { "key": 1.0 }, a, ExamplePoint(1.0, 2.0), PythonOnlyPoint(3.0, 4.0)) ]) field_names = [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q" ] field_types = [ DataTypes.BIGINT(), DataTypes.DOUBLE(), DataTypes.STRING(), DataTypes.STRING(), DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(), DataTypes.ARRAY(DataTypes.DOUBLE()), DataTypes.ARRAY(DataTypes.DOUBLE(False)), DataTypes.ARRAY(DataTypes.STRING()), DataTypes.ARRAY(DataTypes.DATE()), DataTypes.DECIMAL(), DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.BIGINT()), DataTypes.FIELD("b", DataTypes.DOUBLE()) ]), DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.DOUBLE()), DataTypes.VARBINARY(), ExamplePointUDT(), PythonOnlyUDT() ] t_env.register_table_sink("Results", field_names, field_types, source_sink_utils.TestAppendSink()) t.insert_into("Results") t_env.exec_env().execute() actual = source_sink_utils.results() expected = [ '1,1.0,hi,hello,1970-01-02,01:00:00,1970-01-02 00:00:00.0,[1.0, null],' '[1.0, 2.0],[abc],[1970-01-02],1,1,2.0,{key=1.0},[65, 66, 67, 68],[1.0, 2.0],' '[3.0, 4.0]' ] self.assert_equals(actual, expected)
def conversion_from_dataframe(): t_env = TableEnvironment.create(EnvironmentSettings.in_streaming_mode()) t_env.get_config().set("parallelism.default", "1") # define the source with watermark definition pdf = pd.DataFrame(np.random.rand(1000, 2)) table = t_env.from_pandas(pdf, schema=DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.DOUBLE()), DataTypes.FIELD("b", DataTypes.DOUBLE()) ])) print(table.to_pandas())
def test_data_types_only_supported_in_blink_planner(self): timezone = self.t_env.get_config().get_local_timezone() local_datetime = pytz.timezone(timezone).localize( datetime.datetime(1970, 1, 1, 0, 0, 0, 123000)) @udf(result_type=DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3)) def local_zoned_timestamp_func(local_zoned_timestamp_param): assert local_zoned_timestamp_param == local_datetime, \ 'local_zoned_timestamp_param is wrong value %s !' % local_zoned_timestamp_param return local_zoned_timestamp_param table_sink = source_sink_utils.TestAppendSink( ['a'], [DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3)]) self.t_env.register_table_sink("Results", table_sink) t = self.t_env.from_elements( [(local_datetime, )], DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3)) ])) exec_insert_table( t.select( local_zoned_timestamp_func(local_zoned_timestamp_func(t.a))), "Results") actual = source_sink_utils.results() self.assert_equals(actual, ["1970-01-01T00:00:00.123Z"])
def test_data_types_only_supported_in_blink_planner(self): import pandas as pd timezone = self.t_env.get_config().get_local_timezone() local_datetime = pytz.timezone(timezone).localize( datetime.datetime(1970, 1, 2, 0, 0, 0, 123000)) @udf(result_type=DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3), func_type="pandas") def local_zoned_timestamp_func(local_zoned_timestamp_param): assert isinstance(local_zoned_timestamp_param, pd.Series) assert isinstance(local_zoned_timestamp_param[0], datetime.datetime), \ 'local_zoned_timestamp_param of wrong type %s !' % type( local_zoned_timestamp_param[0]) assert local_zoned_timestamp_param[0] == local_datetime, \ 'local_zoned_timestamp_param is wrong value %s, %s!' % \ (local_zoned_timestamp_param[0], local_datetime) return local_zoned_timestamp_param table_sink = source_sink_utils.TestAppendSink( ['a'], [DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3)]) self.t_env.register_table_sink("Results", table_sink) t = self.t_env.from_elements( [(local_datetime, )], DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3)) ])) t.select(local_zoned_timestamp_func(local_zoned_timestamp_func(t.a))) \ .execute_insert("Results").wait() actual = source_sink_utils.results() self.assert_equals(actual, ["+I[1970-01-02T00:00:00.123Z]"])
def test_data_types(self): import pandas as pd timezone = self.t_env.get_config().get_local_timezone() local_datetime = pytz.timezone(timezone).localize( datetime.datetime(1970, 1, 2, 0, 0, 0, 123000)) @udf(result_type=DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3), func_type="pandas") def local_zoned_timestamp_func(local_zoned_timestamp_param): assert isinstance(local_zoned_timestamp_param, pd.Series) assert isinstance(local_zoned_timestamp_param[0], datetime.datetime), \ 'local_zoned_timestamp_param of wrong type %s !' % type( local_zoned_timestamp_param[0]) assert local_zoned_timestamp_param[0] == local_datetime, \ 'local_zoned_timestamp_param is wrong value %s, %s!' % \ (local_zoned_timestamp_param[0], local_datetime) return local_zoned_timestamp_param sink_table_ddl = """ CREATE TABLE Results(a TIMESTAMP_LTZ(3)) WITH ('connector'='test-sink') """ self.t_env.execute_sql(sink_table_ddl) t = self.t_env.from_elements( [(local_datetime, )], DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3)) ])) t.select(local_zoned_timestamp_func(local_zoned_timestamp_func(t.a))) \ .execute_insert("Results").wait() actual = source_sink_utils.results() self.assert_equals(actual, ["+I[1970-01-02T00:00:00.123Z]"])
def test_from_element_expression(self): t_env = self.t_env field_names = ["a", "b", "c"] field_types = [ DataTypes.BIGINT(), DataTypes.STRING(), DataTypes.FLOAT() ] schema = DataTypes.ROW( list( map( lambda field_name, field_type: DataTypes.FIELD( field_name, field_type), field_names, field_types))) sink_table_ddl = """ CREATE TABLE Results(a BIGINT, b STRING, c FLOAT) WITH ('connector'='test-sink') """ self.t_env.execute_sql(sink_table_ddl) t = t_env.from_elements( [row(1, 'abc', 2.0), row(2, 'def', 3.0)], schema) t.execute_insert("Results").wait() actual = source_sink_utils.results() expected = ['+I[1, abc, 2.0]', '+I[2, def, 3.0]'] self.assert_equals(actual, expected)
def test_collect_null_value_result(self): element_data = [(1, None, 'a'), (3, 4, 'b'), (5, None, 'a'), (7, 8, 'b')] source = self.t_env.from_elements(element_data, DataTypes.ROW([DataTypes.FIELD('a', DataTypes.INT()), DataTypes.FIELD('b', DataTypes.INT()), DataTypes.FIELD('c', DataTypes.STRING())])) table_result = source.execute() expected_result = [Row(1, None, 'a'), Row(3, 4, 'b'), Row(5, None, 'a'), Row(7, 8, 'b')] with table_result.collect() as results: collected_result = [] for result in results: collected_result.append(result) self.assertEqual(collected_result, expected_result)
def test_udt(self): self.t_env.from_elements([ (DenseVector([1, 2, 3, 4]), 0., 1.), (DenseVector([2, 2, 3, 4]), 0., 2.), (DenseVector([3, 2, 3, 4]), 0., 3.), (DenseVector([4, 2, 3, 4]), 0., 4.), (DenseVector([5, 2, 3, 4]), 0., 5.), (DenseVector([11, 2, 3, 4]), 1., 1.), (DenseVector([12, 2, 3, 4]), 1., 2.), (DenseVector([13, 2, 3, 4]), 1., 3.), (DenseVector([14, 2, 3, 4]), 1., 4.), (DenseVector([15, 2, 3, 4]), 1., 5.), ], DataTypes.ROW([ DataTypes.FIELD("features", VectorUDT()), DataTypes.FIELD("label", DataTypes.DOUBLE()), DataTypes.FIELD("weight", DataTypes.DOUBLE())]))
def register_rides_source(st_env): st_env \ .connect( # declare the external system to connect to Kafka() .version("universal") .topic("Rides") .start_from_earliest() .property("zookeeper.connect", "zookeeper:2181") .property("bootstrap.servers", "kafka:9092")) \ .with_format( # declare a format for this system Json() .fail_on_missing_field(True) .schema(DataTypes.ROW([ DataTypes.FIELD("rideId", DataTypes.BIGINT()), DataTypes.FIELD("isStart", DataTypes.BOOLEAN()), DataTypes.FIELD("eventTime", DataTypes.STRING()), DataTypes.FIELD("lon", DataTypes.FLOAT()), DataTypes.FIELD("lat", DataTypes.FLOAT()), DataTypes.FIELD("psgCnt", DataTypes.INT()), DataTypes.FIELD("taxiId", DataTypes.BIGINT())]))) \ .with_schema( # declare the schema of the table Schema() .field("rideId", DataTypes.BIGINT()) .field("taxiId", DataTypes.BIGINT()) .field("isStart", DataTypes.BOOLEAN()) .field("lon", DataTypes.FLOAT()) .field("lat", DataTypes.FLOAT()) .field("psgCnt", DataTypes.INT()) .field("eventTime", DataTypes.STRING())) \ .in_append_mode() \ .create_temporary_table("source")
def register_rides_source(st_env): st_env \ .connect( # declare the external system to connect to Kafka() .version("0.11") .topic("Rides") .start_from_earliest() .property("zookeeper.connect", "zookeeper:2181") .property("bootstrap.servers", "kafka:9092")) \ .with_format( # declare a format for this system Json() .fail_on_missing_field(True) .schema(DataTypes.ROW([ DataTypes.FIELD("rideId", DataTypes.BIGINT()), DataTypes.FIELD("isStart", DataTypes.BOOLEAN()), DataTypes.FIELD("eventTime", DataTypes.TIMESTAMP()), DataTypes.FIELD("lon", DataTypes.FLOAT()), DataTypes.FIELD("lat", DataTypes.FLOAT()), DataTypes.FIELD("psgCnt", DataTypes.INT()), DataTypes.FIELD("taxiId", DataTypes.BIGINT())]))) \ .with_schema( # declare the schema of the table Schema() .field("rideId", DataTypes.BIGINT()) .field("taxiId", DataTypes.BIGINT()) .field("isStart", DataTypes.BOOLEAN()) .field("lon", DataTypes.FLOAT()) .field("lat", DataTypes.FLOAT()) .field("psgCnt", DataTypes.INT()) .field("rideTime", DataTypes.TIMESTAMP()) .rowtime( Rowtime() .timestamps_from_field("eventTime") .watermarks_periodic_bounded(60000))) \ .in_append_mode() \ .register_table_source("source")
def register_rides_sink(st_env): st_env \ .connect( # declare the external system to connect to Kafka() .version("0.11") .topic("TempResults") .property("zookeeper.connect", "zookeeper:2181") .property("bootstrap.servers", "kafka:9092")) \ .with_format( # declare a format for this system Json() .fail_on_missing_field(True) .schema(DataTypes.ROW([ DataTypes.FIELD("rideId", DataTypes.BIGINT()), DataTypes.FIELD("taxiId", DataTypes.BIGINT()), DataTypes.FIELD("isStart", DataTypes.BOOLEAN()), DataTypes.FIELD("lon", DataTypes.FLOAT()), DataTypes.FIELD("lat", DataTypes.FLOAT()), DataTypes.FIELD("psgCnt", DataTypes.INT()), DataTypes.FIELD("rideTime", DataTypes.TIMESTAMP()) ]))) \ .with_schema( # declare the schema of the table Schema() .field("rideId", DataTypes.BIGINT()) .field("taxiId", DataTypes.BIGINT()) .field("isStart", DataTypes.BOOLEAN()) .field("lon", DataTypes.FLOAT()) .field("lat", DataTypes.FLOAT()) .field("psgCnt", DataTypes.INT()) .field("rideTime", DataTypes.TIMESTAMP())) \ .in_append_mode() \ .register_table_sink("sink")
def register_transactions_source(st_env): st_env.connect(Kafka() .version("universal") .topic("transactions-data") .start_from_latest() .property("zookeeper.connect", "host.docker.internal:2181") .property("bootstrap.servers", "host.docker.internal:19091")) \ .with_format(Json() .fail_on_missing_field(True) .schema(DataTypes.ROW([ DataTypes.FIELD("customer", DataTypes.STRING()), DataTypes.FIELD("transaction_type", DataTypes.STRING()), DataTypes.FIELD("online_payment_amount", DataTypes.DOUBLE()), DataTypes.FIELD("in_store_payment_amount", DataTypes.DOUBLE()), DataTypes.FIELD("lat", DataTypes.DOUBLE()), DataTypes.FIELD("lon", DataTypes.DOUBLE()), DataTypes.FIELD("transaction_datetime", DataTypes.TIMESTAMP())]))) \ .with_schema(Schema() .field("customer", DataTypes.STRING()) .field("transaction_type", DataTypes.STRING()) .field("online_payment_amount", DataTypes.DOUBLE()) .field("in_store_payment_amount", DataTypes.DOUBLE()) .field("lat", DataTypes.DOUBLE()) .field("lon", DataTypes.DOUBLE()) .field("rowtime", DataTypes.TIMESTAMP()) .rowtime( Rowtime() .timestamps_from_field("transaction_datetime") .watermarks_periodic_bounded(60000))) \ .in_append_mode() \ .register_table_source("source")
def setUp(self): super(BucketizerTest, self).setUp() self.input_table = self.t_env.from_elements( [(1, -0.5, 0.0, 1.0), (2, float('-inf'), 1.0, float('inf')), (3, float('nan'), -0.5, -0.5)], DataTypes.ROW([ DataTypes.FIELD("id", DataTypes.INT()), DataTypes.FIELD("f1", DataTypes.DOUBLE()), DataTypes.FIELD("f2", DataTypes.DOUBLE()), DataTypes.FIELD("f3", DataTypes.DOUBLE()) ])) self.splits_array = ((-0.5, 0.0, 0.5), (-1.0, 0.0, 2.0), (float('-inf'), 10.0, float('inf'))) self.expected_keep_result = [ Row(1, 0, 1, 0), Row(2, 2, 1, 1), Row(3, 2, 0, 0) ] self.expected_skip_result = [Row(1, 0, 1, 0)]
def get_accumulator_type(self): return DataTypes.ROW([ DataTypes.FIELD("available", DataTypes.BOOLEAN()), DataTypes.FIELD("timestamp", DataTypes.BIGINT()), DataTypes.FIELD("batch_num", DataTypes.BIGINT()), DataTypes.FIELD("sold_last", DataTypes.LIST_VIEW(DataTypes.FLOAT())), DataTypes.FIELD("price_last", DataTypes.LIST_VIEW(DataTypes.FLOAT())), DataTypes.FIELD("review_total_last", DataTypes.LIST_VIEW(DataTypes.INT())), DataTypes.FIELD("info", DataTypes.STRING())])
def register_ride_duration_sink(st_env): st_env \ .connect( # declare the external system to connect to Kafka() .version("0.11") .topic("TempResults") .property("zookeeper.connect", "zookeeper:2181") .property("bootstrap.servers", "kafka:9092")) \ .with_format( # declare a format for this system Json() .fail_on_missing_field(True) .schema(DataTypes.ROW([ DataTypes.FIELD("rideId", DataTypes.BIGINT()), DataTypes.FIELD("durationMin", DataTypes.BIGINT()) ]))) \ .with_schema( # declare the schema of the table Schema() .field("rideId", DataTypes.BIGINT()) .field("durationMin", DataTypes.BIGINT())) \ .in_append_mode() \ .register_table_sink("TempResults")
def word_count(input_path, output_path): t_env = TableEnvironment.create(EnvironmentSettings.in_streaming_mode()) # write all the data to one file t_env.get_config().get_configuration().set_string("parallelism.default", "1") # define the source if input_path is not None: t_env.create_temporary_table( 'source', TableDescriptor.for_connector('filesystem').schema( Schema.new_builder().column( 'word', DataTypes.STRING()).build()).option( 'path', input_path).format('csv').build()) tab = t_env.from_path('source') else: print("Executing word_count example with default input data set.") print("Use --input to specify file input.") tab = t_env.from_elements( map(lambda i: (i, ), word_count_data), DataTypes.ROW([DataTypes.FIELD('line', DataTypes.STRING())])) # define the sink if output_path is not None: t_env.create_temporary_table( 'sink', TableDescriptor.for_connector('filesystem').schema( Schema.new_builder().column('word', DataTypes.STRING()).column( 'count', DataTypes.BIGINT()).build()).option('path', output_path). format(FormatDescriptor.for_format('canal-json').build()).build()) else: print( "Printing result to stdout. Use --output to specify output path.") t_env.create_temporary_table( 'sink', TableDescriptor.for_connector('print').schema( Schema.new_builder().column('word', DataTypes.STRING()).column( 'count', DataTypes.BIGINT()).build()).build()) @udtf(result_types=[DataTypes.STRING()]) def split(line: Row): for s in line[0].split(): yield Row(s) # compute word count tab.flat_map(split).alias('word') \ .group_by(col('word')) \ .select(col('word'), lit(1).count) \ .execute_insert('sink') \ .wait()
def register_transactions_source(st_env): st_env.connect(Kafka() .version("universal") .topic("server-logs") .start_from_earliest() .property("zookeeper.connect", "localhost:2181") .property("bootstrap.servers", "localhost:9092")) \ .with_format(Json() .fail_on_missing_field(True) .schema(DataTypes.ROW([ DataTypes.FIELD("event_id", DataTypes.STRING()), DataTypes.FIELD("account_id", DataTypes.DOUBLE()), DataTypes.FIELD("event_type", DataTypes.DOUBLE()), DataTypes.FIELD("location_country", DataTypes.DOUBLE()), DataTypes.FIELD("event_timestamp", DataTypes.TIMESTAMP(precision=3))]))) \ .with_schema(Schema() .field("event_id", DataTypes.STRING()) .field("account_id", DataTypes.DOUBLE()) .field("event_type", DataTypes.STRING()) .field("location_country", DataTypes.STRING()) .field("event_timestamp", DataTypes.TIMESTAMP(precision=3))) \ .in_append_mode() \ .create_temporary_table("source")
def test_schema_basic(self): old_schema = Schema.new_builder() \ .from_row_data_type(DataTypes.ROW( [DataTypes.FIELD("a", DataTypes.TINYINT()), DataTypes.FIELD("b", DataTypes.SMALLINT()), DataTypes.FIELD("c", DataTypes.INT())])) \ .from_fields(["d", "e"], [DataTypes.STRING(), DataTypes.BOOLEAN()]) \ .build() self.schema = Schema.new_builder() \ .from_schema(old_schema) \ .primary_key_named("primary_constraint", "id") \ .column("id", DataTypes.INT().not_null()) \ .column("counter", DataTypes.INT().not_null()) \ .column("payload", "ROW<name STRING, age INT, flag BOOLEAN>") \ .column_by_metadata("topic", DataTypes.STRING(), None, True) \ .column_by_expression("ts", call_sql("orig_ts - INTERVAL '60' MINUTE")) \ .column_by_metadata("orig_ts", DataTypes.TIMESTAMP(3), "timestamp") \ .watermark("ts", "ts - INTERVAL '5' SECOND") \ .column_by_expression("proctime", "PROCTIME()") \ .build() self.assertEqual( """( `a` TINYINT, `b` SMALLINT, `c` INT, `d` STRING, `e` BOOLEAN, `id` INT NOT NULL, `counter` INT NOT NULL, `payload` [ROW<name STRING, age INT, flag BOOLEAN>], `topic` METADATA VIRTUAL, `ts` AS [orig_ts - INTERVAL '60' MINUTE], `orig_ts` METADATA FROM 'timestamp', `proctime` AS [PROCTIME()], WATERMARK FOR `ts` AS [ts - INTERVAL '5' SECOND], CONSTRAINT `primary_constraint` PRIMARY KEY (`id`) NOT ENFORCED )""", str(self.schema))
def test_from_element_expression(self): t_env = self.t_env field_names = ["a", "b", "c"] field_types = [DataTypes.BIGINT(), DataTypes.STRING(), DataTypes.FLOAT()] schema = DataTypes.ROW( list(map(lambda field_name, field_type: DataTypes.FIELD(field_name, field_type), field_names, field_types))) table_sink = source_sink_utils.TestAppendSink(field_names, field_types) t_env.register_table_sink("Results", table_sink) t = t_env.from_elements([row(1, 'abc', 2.0), row(2, 'def', 3.0)], schema) t.execute_insert("Results").wait() actual = source_sink_utils.results() expected = ['+I[1, abc, 2.0]', '+I[2, def, 3.0]'] self.assert_equals(actual, expected)
def test_all_data_types(self): import pandas as pd import numpy as np @udf(result_type=DataTypes.TINYINT(), func_type="pandas") def tinyint_func(tinyint_param): assert isinstance(tinyint_param, pd.Series) assert isinstance(tinyint_param[0], np.int8), \ 'tinyint_param of wrong type %s !' % type(tinyint_param[0]) return tinyint_param @udf(result_type=DataTypes.SMALLINT(), func_type="pandas") def smallint_func(smallint_param): assert isinstance(smallint_param, pd.Series) assert isinstance(smallint_param[0], np.int16), \ 'smallint_param of wrong type %s !' % type(smallint_param[0]) assert smallint_param[ 0] == 32767, 'smallint_param of wrong value %s' % smallint_param return smallint_param @udf(result_type=DataTypes.INT(), func_type="pandas") def int_func(int_param): assert isinstance(int_param, pd.Series) assert isinstance(int_param[0], np.int32), \ 'int_param of wrong type %s !' % type(int_param[0]) assert int_param[ 0] == -2147483648, 'int_param of wrong value %s' % int_param return int_param @udf(result_type=DataTypes.BIGINT(), func_type="pandas") def bigint_func(bigint_param): assert isinstance(bigint_param, pd.Series) assert isinstance(bigint_param[0], np.int64), \ 'bigint_param of wrong type %s !' % type(bigint_param[0]) return bigint_param @udf(result_type=DataTypes.BOOLEAN(), func_type="pandas") def boolean_func(boolean_param): assert isinstance(boolean_param, pd.Series) assert isinstance(boolean_param[0], np.bool_), \ 'boolean_param of wrong type %s !' % type(boolean_param[0]) return boolean_param @udf(result_type=DataTypes.FLOAT(), func_type="pandas") def float_func(float_param): assert isinstance(float_param, pd.Series) assert isinstance(float_param[0], np.float32), \ 'float_param of wrong type %s !' % type(float_param[0]) return float_param @udf(result_type=DataTypes.DOUBLE(), func_type="pandas") def double_func(double_param): assert isinstance(double_param, pd.Series) assert isinstance(double_param[0], np.float64), \ 'double_param of wrong type %s !' % type(double_param[0]) return double_param @udf(result_type=DataTypes.STRING(), func_type="pandas") def varchar_func(varchar_param): assert isinstance(varchar_param, pd.Series) assert isinstance(varchar_param[0], str), \ 'varchar_param of wrong type %s !' % type(varchar_param[0]) return varchar_param @udf(result_type=DataTypes.BYTES(), func_type="pandas") def varbinary_func(varbinary_param): assert isinstance(varbinary_param, pd.Series) assert isinstance(varbinary_param[0], bytes), \ 'varbinary_param of wrong type %s !' % type(varbinary_param[0]) return varbinary_param @udf(result_type=DataTypes.DECIMAL(38, 18), func_type="pandas") def decimal_func(decimal_param): assert isinstance(decimal_param, pd.Series) assert isinstance(decimal_param[0], decimal.Decimal), \ 'decimal_param of wrong type %s !' % type(decimal_param[0]) return decimal_param @udf(result_type=DataTypes.DATE(), func_type="pandas") def date_func(date_param): assert isinstance(date_param, pd.Series) assert isinstance(date_param[0], datetime.date), \ 'date_param of wrong type %s !' % type(date_param[0]) return date_param @udf(result_type=DataTypes.TIME(), func_type="pandas") def time_func(time_param): assert isinstance(time_param, pd.Series) assert isinstance(time_param[0], datetime.time), \ 'time_param of wrong type %s !' % type(time_param[0]) return time_param timestamp_value = datetime.datetime(1970, 1, 2, 0, 0, 0, 123000) @udf(result_type=DataTypes.TIMESTAMP(3), func_type="pandas") def timestamp_func(timestamp_param): assert isinstance(timestamp_param, pd.Series) assert isinstance(timestamp_param[0], datetime.datetime), \ 'timestamp_param of wrong type %s !' % type(timestamp_param[0]) assert timestamp_param[0] == timestamp_value, \ 'timestamp_param is wrong value %s, should be %s!' % (timestamp_param[0], timestamp_value) return timestamp_param def array_func(array_param): assert isinstance(array_param, pd.Series) assert isinstance(array_param[0], np.ndarray), \ 'array_param of wrong type %s !' % type(array_param[0]) return array_param array_str_func = udf(array_func, result_type=DataTypes.ARRAY(DataTypes.STRING()), func_type="pandas") array_timestamp_func = udf(array_func, result_type=DataTypes.ARRAY( DataTypes.TIMESTAMP(3)), func_type="pandas") array_int_func = udf(array_func, result_type=DataTypes.ARRAY(DataTypes.INT()), func_type="pandas") @udf(result_type=DataTypes.ARRAY(DataTypes.STRING()), func_type="pandas") def nested_array_func(nested_array_param): assert isinstance(nested_array_param, pd.Series) assert isinstance(nested_array_param[0], np.ndarray), \ 'nested_array_param of wrong type %s !' % type(nested_array_param[0]) return pd.Series(nested_array_param[0]) row_type = DataTypes.ROW([ DataTypes.FIELD("f1", DataTypes.INT()), DataTypes.FIELD("f2", DataTypes.STRING()), DataTypes.FIELD("f3", DataTypes.TIMESTAMP(3)), DataTypes.FIELD("f4", DataTypes.ARRAY(DataTypes.INT())) ]) @udf(result_type=row_type, func_type="pandas") def row_func(row_param): assert isinstance(row_param, pd.DataFrame) assert isinstance(row_param.f1, pd.Series) assert isinstance(row_param.f1[0], np.int32), \ 'row_param.f1 of wrong type %s !' % type(row_param.f1[0]) assert isinstance(row_param.f2, pd.Series) assert isinstance(row_param.f2[0], str), \ 'row_param.f2 of wrong type %s !' % type(row_param.f2[0]) assert isinstance(row_param.f3, pd.Series) assert isinstance(row_param.f3[0], datetime.datetime), \ 'row_param.f3 of wrong type %s !' % type(row_param.f3[0]) assert isinstance(row_param.f4, pd.Series) assert isinstance(row_param.f4[0], np.ndarray), \ 'row_param.f4 of wrong type %s !' % type(row_param.f4[0]) return row_param table_sink = source_sink_utils.TestAppendSink([ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u' ], [ DataTypes.TINYINT(), DataTypes.SMALLINT(), DataTypes.INT(), DataTypes.BIGINT(), DataTypes.BOOLEAN(), DataTypes.BOOLEAN(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.STRING(), DataTypes.STRING(), DataTypes.BYTES(), DataTypes.DECIMAL(38, 18), DataTypes.DECIMAL(38, 18), DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(3), DataTypes.ARRAY(DataTypes.STRING()), DataTypes.ARRAY(DataTypes.TIMESTAMP(3)), DataTypes.ARRAY(DataTypes.INT()), DataTypes.ARRAY(DataTypes.STRING()), row_type ]) self.t_env.register_table_sink("Results", table_sink) t = self.t_env.from_elements( [(1, 32767, -2147483648, 1, True, False, 1.0, 1.0, 'hello', '中文', bytearray(b'flink'), decimal.Decimal('1000000000000000000.05'), decimal.Decimal( '1000000000000000000.05999999999999999899999999999'), datetime.date(2014, 9, 13), datetime.time(hour=1, minute=0, second=1), timestamp_value, ['hello', '中文', None], [timestamp_value], [1, 2], [[ 'hello', '中文', None ]], Row(1, 'hello', timestamp_value, [1, 2]))], DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.TINYINT()), DataTypes.FIELD("b", DataTypes.SMALLINT()), DataTypes.FIELD("c", DataTypes.INT()), DataTypes.FIELD("d", DataTypes.BIGINT()), DataTypes.FIELD("e", DataTypes.BOOLEAN()), DataTypes.FIELD("f", DataTypes.BOOLEAN()), DataTypes.FIELD("g", DataTypes.FLOAT()), DataTypes.FIELD("h", DataTypes.DOUBLE()), DataTypes.FIELD("i", DataTypes.STRING()), DataTypes.FIELD("j", DataTypes.STRING()), DataTypes.FIELD("k", DataTypes.BYTES()), DataTypes.FIELD("l", DataTypes.DECIMAL(38, 18)), DataTypes.FIELD("m", DataTypes.DECIMAL(38, 18)), DataTypes.FIELD("n", DataTypes.DATE()), DataTypes.FIELD("o", DataTypes.TIME()), DataTypes.FIELD("p", DataTypes.TIMESTAMP(3)), DataTypes.FIELD("q", DataTypes.ARRAY(DataTypes.STRING())), DataTypes.FIELD("r", DataTypes.ARRAY(DataTypes.TIMESTAMP(3))), DataTypes.FIELD("s", DataTypes.ARRAY(DataTypes.INT())), DataTypes.FIELD( "t", DataTypes.ARRAY(DataTypes.ARRAY(DataTypes.STRING()))), DataTypes.FIELD("u", row_type) ])) t.select( tinyint_func(t.a), smallint_func(t.b), int_func(t.c), bigint_func(t.d), boolean_func(t.e), boolean_func(t.f), float_func(t.g), double_func(t.h), varchar_func(t.i), varchar_func(t.j), varbinary_func(t.k), decimal_func(t.l), decimal_func(t.m), date_func(t.n), time_func(t.o), timestamp_func(t.p), array_str_func(t.q), array_timestamp_func(t.r), array_int_func(t.s), nested_array_func(t.t), row_func(t.u)) \ .execute_insert("Results").wait() actual = source_sink_utils.results() self.assert_equals(actual, [ "+I[1, 32767, -2147483648, 1, true, false, 1.0, 1.0, hello, 中文, " "[102, 108, 105, 110, 107], 1000000000000000000.050000000000000000, " "1000000000000000000.059999999999999999, 2014-09-13, 01:00:01, " "1970-01-02 00:00:00.123, [hello, 中文, null], [1970-01-02 00:00:00.123], " "[1, 2], [hello, 中文, null], +I[1, hello, 1970-01-02 00:00:00.123, [1, 2]]]" ])
def test_collect_for_all_data_types(self): expected_result = [ Row(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932, bytearray(b'pyflink'), 'pyflink', datetime.date(2014, 9, 13), datetime.time(12, 0, 0, 123000), datetime.datetime(2018, 3, 11, 3, 0, 0, 123000), [Row(['[pyflink]']), Row(['[pyflink]']), Row(['[pyflink]'])], { 1: Row(['[flink]']), 2: Row(['[pyflink]']) }, decimal.Decimal('1000000000000000000.050000000000000000'), decimal.Decimal('1000000000000000000.059999999999999999')) ] source = self.t_env.from_elements( [(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932, bytearray(b'pyflink'), 'pyflink', datetime.date(2014, 9, 13), datetime.time(hour=12, minute=0, second=0, microsecond=123000), datetime.datetime(2018, 3, 11, 3, 0, 0, 123000), [Row(['pyflink']), Row(['pyflink']), Row(['pyflink'])], { 1: Row(['flink']), 2: Row(['pyflink']) }, decimal.Decimal('1000000000000000000.05'), decimal.Decimal( '1000000000000000000.05999999999999999899999999999'))], DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.BIGINT()), DataTypes.FIELD("b", DataTypes.BIGINT()), DataTypes.FIELD("c", DataTypes.TINYINT()), DataTypes.FIELD("d", DataTypes.BOOLEAN()), DataTypes.FIELD("e", DataTypes.SMALLINT()), DataTypes.FIELD("f", DataTypes.INT()), DataTypes.FIELD("g", DataTypes.FLOAT()), DataTypes.FIELD("h", DataTypes.DOUBLE()), DataTypes.FIELD("i", DataTypes.BYTES()), DataTypes.FIELD("j", DataTypes.STRING()), DataTypes.FIELD("k", DataTypes.DATE()), DataTypes.FIELD("l", DataTypes.TIME()), DataTypes.FIELD("m", DataTypes.TIMESTAMP(3)), DataTypes.FIELD( "n", DataTypes.ARRAY( DataTypes.ROW( [DataTypes.FIELD('ss2', DataTypes.STRING())]))), DataTypes.FIELD( "o", DataTypes.MAP( DataTypes.BIGINT(), DataTypes.ROW( [DataTypes.FIELD('ss', DataTypes.STRING())]))), DataTypes.FIELD("p", DataTypes.DECIMAL(38, 18)), DataTypes.FIELD("q", DataTypes.DECIMAL(38, 18)) ])) table_result = source.execute() with table_result.collect() as result: collected_result = [] for i in result: collected_result.append(i) self.assertEqual(expected_result, collected_result)
from pyflink.table import AggregateFunction, DataTypes from pyflink.table.udf import udaf class WeightedAvg(AggregateFunction): def create_accumulator(self): # Row(sum, count) return Row(0, 0) def get_value(self, accumulator: Row) -> float: if accumulator[1] == 0: return 0 else: return accumulator[0] / accumulator[1] def accumulate(self, accumulator: Row, value, weight): accumulator[0] += value * weight accumulator[1] += weight def retract(self, accumulator: Row, value, weight): accumulator[0] -= value * weight accumulator[1] -= weight weighted_avg = udaf(f=WeightedAvg(), result_type=DataTypes.DOUBLE(), accumulator_type=DataTypes.ROW([ DataTypes.FIELD("f0", DataTypes.BIGINT()), DataTypes.FIELD("f1", DataTypes.BIGINT()) ]))
def test_all_data_types(self): def boolean_func(bool_param): assert isinstance(bool_param, bool), 'bool_param of wrong type %s !' \ % type(bool_param) return bool_param def tinyint_func(tinyint_param): assert isinstance(tinyint_param, int), 'tinyint_param of wrong type %s !' \ % type(tinyint_param) return tinyint_param def smallint_func(smallint_param): assert isinstance(smallint_param, int), 'smallint_param of wrong type %s !' \ % type(smallint_param) assert smallint_param == 32767, 'smallint_param of wrong value %s' % smallint_param return smallint_param def int_func(int_param): assert isinstance(int_param, int), 'int_param of wrong type %s !' \ % type(int_param) assert int_param == -2147483648, 'int_param of wrong value %s' % int_param return int_param def bigint_func(bigint_param): assert isinstance(bigint_param, int), 'bigint_param of wrong type %s !' \ % type(bigint_param) return bigint_param def bigint_func_none(bigint_param): assert bigint_param is None, 'bigint_param %s should be None!' % bigint_param return bigint_param def float_func(float_param): assert isinstance(float_param, float) and float_equal(float_param, 1.23, 1e-6), \ 'float_param is wrong value %s !' % float_param return float_param def double_func(double_param): assert isinstance(double_param, float) and float_equal(double_param, 1.98932, 1e-7), \ 'double_param is wrong value %s !' % double_param return double_param def bytes_func(bytes_param): assert bytes_param == b'flink', \ 'bytes_param is wrong value %s !' % bytes_param return bytes_param def str_func(str_param): assert str_param == 'pyflink', \ 'str_param is wrong value %s !' % str_param return str_param def date_func(date_param): from datetime import date assert date_param == date(year=2014, month=9, day=13), \ 'date_param is wrong value %s !' % date_param return date_param def time_func(time_param): from datetime import time assert time_param == time(hour=12, minute=0, second=0, microsecond=123000), \ 'time_param is wrong value %s !' % time_param return time_param def timestamp_func(timestamp_param): from datetime import datetime assert timestamp_param == datetime(2018, 3, 11, 3, 0, 0, 123000), \ 'timestamp_param is wrong value %s !' % timestamp_param return timestamp_param def array_func(array_param): assert array_param == [[1, 2, 3]], \ 'array_param is wrong value %s !' % array_param return array_param[0] def map_func(map_param): assert map_param == {1: 'flink', 2: 'pyflink'}, \ 'map_param is wrong value %s !' % map_param return map_param def decimal_func(decimal_param): from decimal import Decimal assert decimal_param == Decimal('1000000000000000000.050000000000000000'), \ 'decimal_param is wrong value %s !' % decimal_param return decimal_param def decimal_cut_func(decimal_param): from decimal import Decimal assert decimal_param == Decimal('1000000000000000000.059999999999999999'), \ 'decimal_param is wrong value %s !' % decimal_param return decimal_param self.t_env.create_temporary_system_function( "boolean_func", udf(boolean_func, result_type=DataTypes.BOOLEAN())) self.t_env.create_temporary_system_function( "tinyint_func", udf(tinyint_func, result_type=DataTypes.TINYINT())) self.t_env.create_temporary_system_function( "smallint_func", udf(smallint_func, result_type=DataTypes.SMALLINT())) self.t_env.create_temporary_system_function( "int_func", udf(int_func, result_type=DataTypes.INT())) self.t_env.create_temporary_system_function( "bigint_func", udf(bigint_func, result_type=DataTypes.BIGINT())) self.t_env.create_temporary_system_function( "bigint_func_none", udf(bigint_func_none, result_type=DataTypes.BIGINT())) self.t_env.create_temporary_system_function( "float_func", udf(float_func, result_type=DataTypes.FLOAT())) self.t_env.create_temporary_system_function( "double_func", udf(double_func, result_type=DataTypes.DOUBLE())) self.t_env.create_temporary_system_function( "bytes_func", udf(bytes_func, result_type=DataTypes.BYTES())) self.t_env.create_temporary_system_function( "str_func", udf(str_func, result_type=DataTypes.STRING())) self.t_env.create_temporary_system_function( "date_func", udf(date_func, result_type=DataTypes.DATE())) self.t_env.create_temporary_system_function( "time_func", udf(time_func, result_type=DataTypes.TIME())) self.t_env.create_temporary_system_function( "timestamp_func", udf(timestamp_func, result_type=DataTypes.TIMESTAMP(3))) self.t_env.create_temporary_system_function( "array_func", udf(array_func, result_type=DataTypes.ARRAY(DataTypes.BIGINT()))) self.t_env.create_temporary_system_function( "map_func", udf(map_func, result_type=DataTypes.MAP(DataTypes.BIGINT(), DataTypes.STRING()))) self.t_env.register_function( "decimal_func", udf(decimal_func, result_type=DataTypes.DECIMAL(38, 18))) self.t_env.register_function( "decimal_cut_func", udf(decimal_cut_func, result_type=DataTypes.DECIMAL(38, 18))) table_sink = source_sink_utils.TestAppendSink([ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q' ], [ DataTypes.BIGINT(), DataTypes.BIGINT(), DataTypes.TINYINT(), DataTypes.BOOLEAN(), DataTypes.SMALLINT(), DataTypes.INT(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.BYTES(), DataTypes.STRING(), DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(3), DataTypes.ARRAY(DataTypes.BIGINT()), DataTypes.MAP(DataTypes.BIGINT(), DataTypes.STRING()), DataTypes.DECIMAL(38, 18), DataTypes.DECIMAL(38, 18) ]) self.t_env.register_table_sink("Results", table_sink) import datetime import decimal t = self.t_env.from_elements( [(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932, bytearray(b'flink'), 'pyflink', datetime.date(2014, 9, 13), datetime.time(hour=12, minute=0, second=0, microsecond=123000), datetime.datetime(2018, 3, 11, 3, 0, 0, 123000), [[1, 2, 3]], { 1: 'flink', 2: 'pyflink' }, decimal.Decimal('1000000000000000000.05'), decimal.Decimal( '1000000000000000000.05999999999999999899999999999'))], DataTypes.ROW([ DataTypes.FIELD("a", DataTypes.BIGINT()), DataTypes.FIELD("b", DataTypes.BIGINT()), DataTypes.FIELD("c", DataTypes.TINYINT()), DataTypes.FIELD("d", DataTypes.BOOLEAN()), DataTypes.FIELD("e", DataTypes.SMALLINT()), DataTypes.FIELD("f", DataTypes.INT()), DataTypes.FIELD("g", DataTypes.FLOAT()), DataTypes.FIELD("h", DataTypes.DOUBLE()), DataTypes.FIELD("i", DataTypes.BYTES()), DataTypes.FIELD("j", DataTypes.STRING()), DataTypes.FIELD("k", DataTypes.DATE()), DataTypes.FIELD("l", DataTypes.TIME()), DataTypes.FIELD("m", DataTypes.TIMESTAMP(3)), DataTypes.FIELD( "n", DataTypes.ARRAY(DataTypes.ARRAY(DataTypes.BIGINT()))), DataTypes.FIELD( "o", DataTypes.MAP(DataTypes.BIGINT(), DataTypes.STRING())), DataTypes.FIELD("p", DataTypes.DECIMAL(38, 18)), DataTypes.FIELD("q", DataTypes.DECIMAL(38, 18)) ])) exec_insert_table( t.select("bigint_func(a), bigint_func_none(b)," "tinyint_func(c), boolean_func(d)," "smallint_func(e),int_func(f)," "float_func(g),double_func(h)," "bytes_func(i),str_func(j)," "date_func(k),time_func(l)," "timestamp_func(m),array_func(n)," "map_func(o),decimal_func(p)," "decimal_cut_func(q)"), "Results") actual = source_sink_utils.results() # Currently the sink result precision of DataTypes.TIME(precision) only supports 0. self.assert_equals(actual, [ "1,null,1,true,32767,-2147483648,1.23,1.98932," "[102, 108, 105, 110, 107],pyflink,2014-09-13," "12:00:00,2018-03-11 03:00:00.123,[1, 2, 3]," "{1=flink, 2=pyflink},1000000000000000000.050000000000000000," "1000000000000000000.059999999999999999" ])
def get_accumulator_type(self): return DataTypes.ROW([DataTypes.FIELD("f0", DataTypes.LIST_VIEW(DataTypes.STRING()))])