def test_comment_prefix(self): csv = OldCsv() csv = csv.comment_prefix("#") properties = csv.to_properties() expected = {'format.comment-prefix': '#', 'format.type': 'csv', 'format.property-version': '1'} assert properties == expected
def test_ignore_first_line(self): csv = OldCsv() csv = csv.ignore_first_line() properties = csv.to_properties() expected = {'format.ignore-first-line': 'true', 'format.type': 'csv', 'format.property-version': '1'} assert properties == expected
def test_ignore_parse_errors(self): csv = OldCsv() csv = csv.ignore_parse_errors() properties = csv.to_properties() expected = {'format.ignore-parse-errors': 'true', 'format.type': 'csv', 'format.property-version': '1'} assert properties == expected
def test_quote_character(self): csv = OldCsv() csv = csv.quote_character("*") properties = csv.to_properties() expected = {'format.quote-character': '*', 'format.type': 'csv', 'format.property-version': '1'} assert properties == expected
def test_field_delimiter(self): csv = OldCsv() csv = csv.field_delimiter("|") properties = csv.to_properties() expected = {'format.field-delimiter': '|', 'format.type': 'csv', 'format.property-version': '1'} assert properties == expected
def test_line_delimiter(self): csv = OldCsv() csv = csv.line_delimiter(";") expected = {'format.type': 'csv', 'format.property-version': '1', 'format.line-delimiter': ';'} properties = csv.to_properties() assert properties == expected
def test_end_to_end(self): source_path = os.path.join(self.tempdir + '/streaming.csv') with open(source_path, 'w') as f: lines = 'a,b,c\n' + \ '1,hi,hello\n' + \ '#comments\n' + \ "error line\n" + \ '2,"hi,world!",hello\n' f.write(lines) f.close() sink_path = os.path.join(self.tempdir + '/streaming2.csv') t_env = self.t_env # connect source t_env.connect(FileSystem().path(source_path))\ .with_format(OldCsv() .field_delimiter(',') .line_delimiter("\n") .ignore_parse_errors() .quote_character('"') .comment_prefix("#") .ignore_first_line() .field("a", "INT") .field("b", "VARCHAR") .field("c", "VARCHAR"))\ .with_schema(Schema() .field("a", "INT") .field("b", "VARCHAR") .field("c", "VARCHAR"))\ .in_append_mode()\ .register_table_source("source") # connect sink t_env.connect(FileSystem().path(sink_path))\ .with_format(OldCsv() .field_delimiter(',') .field("a", DataTypes.INT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .with_schema(Schema() .field("a", DataTypes.INT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .register_table_sink("sink") t_env.scan("source") \ .select("a + 1, b, c") \ .insert_into("sink") t_env.execute() with open(sink_path, 'r') as f: lines = f.read() assert lines == '2,hi,hello\n' + '3,hi,world!,hello\n'
def test_schema(): csv = OldCsv() schema = TableSchema(["a", "b"], [DataTypes.INT(), DataTypes.STRING()]) csv = csv.schema(schema) properties = csv.to_properties() expected = {'format.fields.0.name': 'a', 'format.fields.0.type': 'INT', 'format.fields.1.name': 'b', 'format.fields.1.type': 'VARCHAR', 'format.type': 'csv', 'format.property-version': '1'} assert properties == expected
def test_register_table_source(self): source_path = os.path.join(self.tempdir + '/streaming.csv') field_names = ["a", "b", "c"] field_types = [DataTypes.INT(), DataTypes.STRING(), DataTypes.STRING()] data = [(1, "Hi", "Hello"), (2, "Hello", "Hello")] self.prepare_csv_source(source_path, data, field_types, field_names) t_env = self.t_env sink_path = os.path.join(self.tempdir + '/streaming2.csv') if os.path.isfile(sink_path): os.remove(sink_path) t_env.register_table_sink( "sink", field_names, field_types, CsvTableSink(sink_path)) # connect source t_env.connect(FileSystem().path(source_path))\ .with_format(OldCsv() .field_delimiter(',') .field("a", DataTypes.INT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .with_schema(Schema() .field("a", DataTypes.INT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .register_table_source("source") t_env.scan("source") \ .select("a + 1, b, c") \ .insert_into("sink") t_env.execute() with open(sink_path, 'r') as f: lines = f.read() assert lines == '2,Hi,Hello\n' + '3,Hello,Hello\n'
def test_with_schema(self): descriptor = self.t_env.connect(FileSystem()) descriptor = descriptor.with_format(OldCsv()).with_schema(Schema().field("a", "INT")) properties = descriptor.to_properties() expected = {'schema.0.name': 'a', 'schema.0.type': 'INT', 'format.type': 'csv', 'format.property-version': '1', 'connector.type': 'filesystem', 'connector.property-version': '1'} assert properties == expected
def test_field(self): csv = OldCsv() csv.field("a", DataTypes.LONG) csv.field("b", DataTypes.STRING) csv.field("c", "SQL_TIMESTAMP") properties = csv.to_properties() expected = { 'format.fields.0.name': 'a', 'format.fields.0.type': 'BIGINT', 'format.fields.1.name': 'b', 'format.fields.1.type': 'VARCHAR', 'format.fields.2.name': 'c', 'format.fields.2.type': 'SQL_TIMESTAMP', 'format.type': 'csv', 'format.property-version': '1' } assert properties == expected
def test_in_upsert_mode(self): descriptor = self.t_env.connect(FileSystem()) descriptor = descriptor \ .with_format(OldCsv()) \ .in_upsert_mode() properties = descriptor.to_properties() expected = {'update-mode': 'upsert', 'format.type': 'csv', 'format.property-version': '1', 'connector.property-version': '1', 'connector.type': 'filesystem'} assert properties == expected
def test_field(self): csv = OldCsv() csv.field("a", DataTypes.LONG) csv.field("b", DataTypes.STRING) csv.field("c", "SQL_TIMESTAMP") properties = csv.to_properties() expected = {'format.fields.0.name': 'a', 'format.fields.0.type': 'BIGINT', 'format.fields.1.name': 'b', 'format.fields.1.type': 'VARCHAR', 'format.fields.2.name': 'c', 'format.fields.2.type': 'SQL_TIMESTAMP', 'format.type': 'csv', 'format.property-version': '1'} assert properties == expected