def test_stream_case(self): from pyflink.shell import s_env, st_env, DataTypes from pyflink.table.schema import Schema from pyflink.table.table_descriptor import TableDescriptor, FormatDescriptor # example begin import tempfile import os import shutil sink_path = tempfile.gettempdir() + '/streaming.csv' if os.path.exists(sink_path): if os.path.isfile(sink_path): os.remove(sink_path) else: shutil.rmtree(sink_path) s_env.set_parallelism(1) t = st_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c']) st_env.create_temporary_table( "stream_sink", TableDescriptor.for_connector("filesystem").schema( Schema.new_builder().column("a", DataTypes.BIGINT()).column( "b", DataTypes.STRING()).column( "c", DataTypes.STRING()).build()).option( "path", sink_path).format( FormatDescriptor.for_format("csv").option( "field-delimiter", ",").build()).build()) t.select(t.a + 1, t.b, t.c).execute_insert("stream_sink").wait() # verify code, do not copy these code to shell.py with open(os.path.join(sink_path, os.listdir(sink_path)[0]), 'r') as f: lines = f.read() self.assertEqual(lines, '2,hi,hello\n' + '3,hi,hello\n')
def test_stream_case(self): from pyflink.shell import s_env, st_env, FileSystem, OldCsv, DataTypes, Schema # example begin import tempfile import os import shutil sink_path = tempfile.gettempdir() + '/streaming.csv' if os.path.exists(sink_path): if os.path.isfile(sink_path): os.remove(sink_path) else: shutil.rmtree(sink_path) s_env.set_parallelism(1) t = st_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c']) st_env.connect(FileSystem().path(sink_path))\ .with_format(OldCsv() .field_delimiter(',') .field("a", DataTypes.BIGINT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .with_schema(Schema() .field("a", DataTypes.BIGINT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()))\ .create_temporary_table("stream_sink") t.select("a + 1, b, c").execute_insert("stream_sink").wait() # verify code, do not copy these code to shell.py with open(sink_path, 'r') as f: lines = f.read() self.assertEqual(lines, '2,hi,hello\n' + '3,hi,hello\n')
import os import shutil sink_path = tempfile.gettempdir() + '/batch.csv' if os.path.exists(sink_path): if os.path.isfile(sink_path): os.remove(sink_path) else: shutil.rmtree(sink_path) s_env.set_parallelism(1) t = st_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c']) st_env.create_temporary_table( "csv_sink", TableDescriptor.for_connector("filesystem").schema( Schema.new_builder().column( "a", DataTypes.BIGINT()).column("b", DataTypes.STRING()).column( "c", DataTypes.STRING()).build()).option("path", sink_path).format( FormatDescriptor.for_format("csv").option( "field-delimiter", ",").build()).build()) t.select(t.a + lit(1), t.b, t.c).execute_insert("csv_sink").wait() with open(os.path.join(sink_path, os.listdir(sink_path)[0]), 'r') as f: lines = f.read() assert lines == '2,hi,hello\n' + '3,hi,hello\n' print('pip_test_code.py success!')
import tempfile import os import shutil sink_path = tempfile.gettempdir() + '/batch.csv' if os.path.exists(sink_path): if os.path.isfile(sink_path): os.remove(sink_path) else: shutil.rmtree(sink_path) b_env.set_parallelism(1) t = bt_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c']) bt_env.connect(FileSystem().path(sink_path)) \ .with_format(OldCsv() .field_delimiter(',') .field("a", DataTypes.BIGINT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING())) \ .with_schema(Schema() .field("a", DataTypes.BIGINT()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING())) \ .create_temporary_table("batch_sink") t.select("a + 1, b, c").execute_insert("batch_sink").get_job_client().get_job_execution_result().result() with open(sink_path, 'r') as f: lines = f.read() assert lines == '2,hi,hello\n' + '3,hi,hello\n' print('pip_test_code.py success!')
import tempfile import os import shutil sink_path = tempfile.gettempdir() + '/batch.csv' if os.path.exists(sink_path): if os.path.isfile(sink_path): os.remove(sink_path) else: shutil.rmtree(sink_path) s_env.set_parallelism(1) t = st_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c']) st_env.create_temporary_table("csv_sink", TableDescriptor.for_connector("filesystem") .schema(Schema.new_builder() .column("a", DataTypes.BIGINT()) .column("b", DataTypes.STRING()) .column("c", DataTypes.STRING()) .build()) .option("path", sink_path) .format(FormatDescriptor.for_format("csv") .option("field-delimiter", ",") .build()) .build()) t.select("a + 1, b, c").execute_insert("csv_sink").wait() with open(os.path.join(sink_path, os.listdir(sink_path)[0]), 'r') as f: lines = f.read() assert lines == '2,hi,hello\n' + '3,hi,hello\n'