def test_path(self): file_system = FileSystem() file_system = file_system.path("/test.csv") properties = file_system.to_properties() expected = {'connector.property-version': '1', 'connector.type': 'filesystem', 'connector.path': '/test.csv'} assert properties == expected
def test_end_to_end(self): source_path = os.path.join(self.tempdir + '/streaming.csv') with open(source_path, 'w') as f: lines = 'a,b,c\n' + \ '1,hi,hello\n' + \ '#comments\n' + \ "error line\n" + \ '2,"hi,world!",hello\n' f.write(lines) f.close() sink_path = os.path.join(self.tempdir + '/streaming2.csv') t_env = self.t_env # connect source t_env.connect(FileSystem().path(source_path))\ .with_format(OldCsv() .field_delimiter(',') .line_delimiter("\n") .ignore_parse_errors() .quote_character('"') .comment_prefix("#") .ignore_first_line() .field("a", "INT") .field("b", "VARCHAR") .field("c", "VARCHAR"))\ .with_schema(Schema() .field("a", "INT") .field("b", "VARCHAR") .field("c", "VARCHAR"))\ .in_append_mode()\ .register_table_source("source") # connect sink t_env.connect(FileSystem().path(sink_path))\ .with_format(OldCsv() .field_delimiter(',') .field("a", DataTypes.INT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .with_schema(Schema() .field("a", DataTypes.INT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .register_table_sink("sink") t_env.scan("source") \ .select("a + 1, b, c") \ .insert_into("sink") t_env.execute() with open(sink_path, 'r') as f: lines = f.read() assert lines == '2,hi,hello\n' + '3,hi,world!,hello\n'
def test_register_table_source(self): source_path = os.path.join(self.tempdir + '/streaming.csv') field_names = ["a", "b", "c"] field_types = [DataTypes.INT(), DataTypes.STRING(), DataTypes.STRING()] data = [(1, "Hi", "Hello"), (2, "Hello", "Hello")] self.prepare_csv_source(source_path, data, field_types, field_names) t_env = self.t_env sink_path = os.path.join(self.tempdir + '/streaming2.csv') if os.path.isfile(sink_path): os.remove(sink_path) t_env.register_table_sink( "sink", field_names, field_types, CsvTableSink(sink_path)) # connect source t_env.connect(FileSystem().path(source_path))\ .with_format(OldCsv() .field_delimiter(',') .field("a", DataTypes.INT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .with_schema(Schema() .field("a", DataTypes.INT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .register_table_source("source") t_env.scan("source") \ .select("a + 1, b, c") \ .insert_into("sink") t_env.execute() with open(sink_path, 'r') as f: lines = f.read() assert lines == '2,Hi,Hello\n' + '3,Hello,Hello\n'
def test_with_schema(self): descriptor = self.t_env.connect(FileSystem()) descriptor = descriptor.with_format(OldCsv()).with_schema(Schema().field("a", "INT")) properties = descriptor.to_properties() expected = {'schema.0.name': 'a', 'schema.0.type': 'INT', 'format.type': 'csv', 'format.property-version': '1', 'connector.type': 'filesystem', 'connector.property-version': '1'} assert properties == expected
def test_in_upsert_mode(self): descriptor = self.t_env.connect(FileSystem()) descriptor = descriptor \ .with_format(OldCsv()) \ .in_upsert_mode() properties = descriptor.to_properties() expected = {'update-mode': 'upsert', 'format.type': 'csv', 'format.property-version': '1', 'connector.property-version': '1', 'connector.type': 'filesystem'} assert properties == expected