def test_rowtime(self): schema = Schema() schema = schema\ .field("int_field", DataTypes.INT())\ .field("long_field", DataTypes.BIGINT())\ .field("rtime", DataTypes.BIGINT())\ .rowtime( Rowtime().timestamps_from_field("long_field").watermarks_periodic_bounded(5000))\ .field("string_field", DataTypes.STRING()) properties = schema.to_properties() print(properties) expected = {'schema.0.name': 'int_field', 'schema.0.type': 'INT', 'schema.1.name': 'long_field', 'schema.1.type': 'BIGINT', 'schema.2.name': 'rtime', 'schema.2.type': 'BIGINT', 'schema.2.rowtime.timestamps.type': 'from-field', 'schema.2.rowtime.timestamps.from': 'long_field', 'schema.2.rowtime.watermarks.type': 'periodic-bounded', 'schema.2.rowtime.watermarks.delay': '5000', 'schema.3.name': 'string_field', 'schema.3.type': 'VARCHAR'} assert properties == expected
def test_schema(): schema = Schema() table_schema = TableSchema(["a", "b"], [DataTypes.INT(), DataTypes.STRING()]) schema = schema.schema(table_schema) properties = schema.to_properties() expected = {'schema.0.name': 'a', 'schema.0.type': 'INT', 'schema.1.name': 'b', 'schema.1.type': 'VARCHAR'} assert properties == expected
def test_end_to_end(self): source_path = os.path.join(self.tempdir + '/streaming.csv') with open(source_path, 'w') as f: lines = 'a,b,c\n' + \ '1,hi,hello\n' + \ '#comments\n' + \ "error line\n" + \ '2,"hi,world!",hello\n' f.write(lines) f.close() sink_path = os.path.join(self.tempdir + '/streaming2.csv') t_env = self.t_env # connect source t_env.connect(FileSystem().path(source_path))\ .with_format(OldCsv() .field_delimiter(',') .line_delimiter("\n") .ignore_parse_errors() .quote_character('"') .comment_prefix("#") .ignore_first_line() .field("a", "INT") .field("b", "VARCHAR") .field("c", "VARCHAR"))\ .with_schema(Schema() .field("a", "INT") .field("b", "VARCHAR") .field("c", "VARCHAR"))\ .in_append_mode()\ .register_table_source("source") # connect sink t_env.connect(FileSystem().path(sink_path))\ .with_format(OldCsv() .field_delimiter(',') .field("a", DataTypes.INT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .with_schema(Schema() .field("a", DataTypes.INT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .register_table_sink("sink") t_env.scan("source") \ .select("a + 1, b, c") \ .insert_into("sink") t_env.execute() with open(sink_path, 'r') as f: lines = f.read() assert lines == '2,hi,hello\n' + '3,hi,world!,hello\n'
def test_schema(self): table_schema = TableSchema( ["a", "b"], [DataTypes.INT(), DataTypes.STRING()]) schema = Schema().schema(table_schema) properties = schema.to_properties() expected = { 'schema.0.name': 'a', 'schema.0.type': 'INT', 'schema.1.name': 'b', 'schema.1.type': 'VARCHAR' } self.assertEqual(expected, properties)
def test_register_table_source(self): source_path = os.path.join(self.tempdir + '/streaming.csv') field_names = ["a", "b", "c"] field_types = [DataTypes.INT(), DataTypes.STRING(), DataTypes.STRING()] data = [(1, "Hi", "Hello"), (2, "Hello", "Hello")] self.prepare_csv_source(source_path, data, field_types, field_names) t_env = self.t_env sink_path = os.path.join(self.tempdir + '/streaming2.csv') if os.path.isfile(sink_path): os.remove(sink_path) t_env.register_table_sink( "sink", field_names, field_types, CsvTableSink(sink_path)) # connect source t_env.connect(FileSystem().path(source_path))\ .with_format(OldCsv() .field_delimiter(',') .field("a", DataTypes.INT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .with_schema(Schema() .field("a", DataTypes.INT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .register_table_source("source") t_env.scan("source") \ .select("a + 1, b, c") \ .insert_into("sink") t_env.execute() with open(sink_path, 'r') as f: lines = f.read() assert lines == '2,Hi,Hello\n' + '3,Hello,Hello\n'
def test_proctime(self): schema = Schema() schema = schema\ .field("int_field", DataTypes.INT())\ .field("ptime", DataTypes.BIGINT()).proctime()\ .field("string_field", DataTypes.STRING()) properties = schema.to_properties() expected = {'schema.0.name': 'int_field', 'schema.0.type': 'INT', 'schema.1.name': 'ptime', 'schema.1.type': 'BIGINT', 'schema.1.proctime': 'true', 'schema.2.name': 'string_field', 'schema.2.type': 'VARCHAR'} assert properties == expected
def test_from_origin_field(self): schema = Schema() schema = schema\ .field("int_field", DataTypes.INT())\ .field("long_field", DataTypes.BIGINT()).from_origin_field("origin_field_a")\ .field("string_field", DataTypes.STRING()) properties = schema.to_properties() expected = {'schema.0.name': 'int_field', 'schema.0.type': 'INT', 'schema.1.name': 'long_field', 'schema.1.type': 'BIGINT', 'schema.1.from': 'origin_field_a', 'schema.2.name': 'string_field', 'schema.2.type': 'VARCHAR'} assert properties == expected
def test_field_in_string(self): schema = Schema() schema = schema\ .field("int_field", 'INT')\ .field("long_field", 'BIGINT')\ .field("string_field", 'VARCHAR')\ .field("timestamp_field", 'SQL_TIMESTAMP')\ .field("time_field", 'SQL_TIME')\ .field("date_field", 'SQL_DATE')\ .field("double_field", 'DOUBLE')\ .field("float_field", 'FLOAT')\ .field("byte_field", 'TINYINT')\ .field("short_field", 'SMALLINT')\ .field("boolean_field", 'BOOLEAN') properties = schema.to_properties() expected = { 'schema.0.name': 'int_field', 'schema.0.type': 'INT', 'schema.1.name': 'long_field', 'schema.1.type': 'BIGINT', 'schema.2.name': 'string_field', 'schema.2.type': 'VARCHAR', 'schema.3.name': 'timestamp_field', 'schema.3.type': 'SQL_TIMESTAMP', 'schema.4.name': 'time_field', 'schema.4.type': 'SQL_TIME', 'schema.5.name': 'date_field', 'schema.5.type': 'SQL_DATE', 'schema.6.name': 'double_field', 'schema.6.type': 'DOUBLE', 'schema.7.name': 'float_field', 'schema.7.type': 'FLOAT', 'schema.8.name': 'byte_field', 'schema.8.type': 'TINYINT', 'schema.9.name': 'short_field', 'schema.9.type': 'SMALLINT', 'schema.10.name': 'boolean_field', 'schema.10.type': 'BOOLEAN' } assert properties == expected
def test_field(self): schema = Schema() schema = schema\ .field("int_field", DataTypes.INT)\ .field("long_field", DataTypes.LONG)\ .field("string_field", DataTypes.STRING)\ .field("timestamp_field", DataTypes.TIMESTAMP)\ .field("time_field", DataTypes.TIME)\ .field("date_field", DataTypes.DATE)\ .field("double_field", DataTypes.DOUBLE)\ .field("float_field", DataTypes.FLOAT)\ .field("byte_field", DataTypes.BYTE)\ .field("short_field", DataTypes.SHORT)\ .field("boolean_field", DataTypes.BOOLEAN) properties = schema.to_properties() expected = { 'schema.0.name': 'int_field', 'schema.0.type': 'INT', 'schema.1.name': 'long_field', 'schema.1.type': 'BIGINT', 'schema.2.name': 'string_field', 'schema.2.type': 'VARCHAR', 'schema.3.name': 'timestamp_field', 'schema.3.type': 'TIMESTAMP', 'schema.4.name': 'time_field', 'schema.4.type': 'TIME', 'schema.5.name': 'date_field', 'schema.5.type': 'DATE', 'schema.6.name': 'double_field', 'schema.6.type': 'DOUBLE', 'schema.7.name': 'float_field', 'schema.7.type': 'FLOAT', 'schema.8.name': 'byte_field', 'schema.8.type': 'TINYINT', 'schema.9.name': 'short_field', 'schema.9.type': 'SMALLINT', 'schema.10.name': 'boolean_field', 'schema.10.type': 'BOOLEAN' } assert properties == expected
def test_field_in_string(self): schema = Schema() schema = schema\ .field("int_field", 'INT')\ .field("long_field", 'BIGINT')\ .field("string_field", 'VARCHAR')\ .field("timestamp_field", 'SQL_TIMESTAMP')\ .field("time_field", 'SQL_TIME')\ .field("date_field", 'SQL_DATE')\ .field("double_field", 'DOUBLE')\ .field("float_field", 'FLOAT')\ .field("byte_field", 'TINYINT')\ .field("short_field", 'SMALLINT')\ .field("boolean_field", 'BOOLEAN') properties = schema.to_properties() expected = {'schema.0.name': 'int_field', 'schema.0.type': 'INT', 'schema.1.name': 'long_field', 'schema.1.type': 'BIGINT', 'schema.2.name': 'string_field', 'schema.2.type': 'VARCHAR', 'schema.3.name': 'timestamp_field', 'schema.3.type': 'SQL_TIMESTAMP', 'schema.4.name': 'time_field', 'schema.4.type': 'SQL_TIME', 'schema.5.name': 'date_field', 'schema.5.type': 'SQL_DATE', 'schema.6.name': 'double_field', 'schema.6.type': 'DOUBLE', 'schema.7.name': 'float_field', 'schema.7.type': 'FLOAT', 'schema.8.name': 'byte_field', 'schema.8.type': 'TINYINT', 'schema.9.name': 'short_field', 'schema.9.type': 'SMALLINT', 'schema.10.name': 'boolean_field', 'schema.10.type': 'BOOLEAN'} assert properties == expected
def test_field(self): schema = Schema() schema = schema\ .field("int_field", DataTypes.INT())\ .field("long_field", DataTypes.BIGINT())\ .field("string_field", DataTypes.STRING())\ .field("timestamp_field", DataTypes.TIMESTAMP())\ .field("time_field", DataTypes.TIME())\ .field("date_field", DataTypes.DATE())\ .field("double_field", DataTypes.DOUBLE())\ .field("float_field", DataTypes.FLOAT())\ .field("byte_field", DataTypes.TINYINT())\ .field("short_field", DataTypes.SMALLINT())\ .field("boolean_field", DataTypes.BOOLEAN()) properties = schema.to_properties() expected = {'schema.0.name': 'int_field', 'schema.0.type': 'INT', 'schema.1.name': 'long_field', 'schema.1.type': 'BIGINT', 'schema.2.name': 'string_field', 'schema.2.type': 'VARCHAR', 'schema.3.name': 'timestamp_field', 'schema.3.type': 'TIMESTAMP', 'schema.4.name': 'time_field', 'schema.4.type': 'TIME', 'schema.5.name': 'date_field', 'schema.5.type': 'DATE', 'schema.6.name': 'double_field', 'schema.6.type': 'DOUBLE', 'schema.7.name': 'float_field', 'schema.7.type': 'FLOAT', 'schema.8.name': 'byte_field', 'schema.8.type': 'TINYINT', 'schema.9.name': 'short_field', 'schema.9.type': 'SMALLINT', 'schema.10.name': 'boolean_field', 'schema.10.type': 'BOOLEAN'} assert properties == expected
def test_with_schema(self): descriptor = self.t_env.connect(FileSystem()) descriptor = descriptor.with_format(OldCsv()).with_schema(Schema().field("a", "INT")) properties = descriptor.to_properties() expected = {'schema.0.name': 'a', 'schema.0.type': 'INT', 'format.type': 'csv', 'format.property-version': '1', 'connector.type': 'filesystem', 'connector.property-version': '1'} assert properties == expected