Esempio n. 1
0
    def test_stream_case(self):
        from pyflink.shell import s_env, st_env, FileSystem, OldCsv, DataTypes, Schema
        # example begin

        import tempfile
        import os
        import shutil
        sink_path = tempfile.gettempdir() + '/streaming.csv'
        if os.path.exists(sink_path):
            if os.path.isfile(sink_path):
                os.remove(sink_path)
            else:
                shutil.rmtree(sink_path)
        s_env.set_parallelism(1)
        t = st_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c'])
        st_env.connect(FileSystem().path(sink_path))\
            .with_format(OldCsv()
                         .field_delimiter(',')
                         .field("a", DataTypes.BIGINT())
                         .field("b", DataTypes.STRING())
                         .field("c", DataTypes.STRING()))\
            .with_schema(Schema()
                         .field("a", DataTypes.BIGINT())
                         .field("b", DataTypes.STRING())
                         .field("c", DataTypes.STRING()))\
            .create_temporary_table("stream_sink")

        t.select("a + 1, b, c").execute_insert("stream_sink").wait()

        # verify code, do not copy these code to shell.py
        with open(sink_path, 'r') as f:
            lines = f.read()
            self.assertEqual(lines, '2,hi,hello\n' + '3,hi,hello\n')
Esempio n. 2
0
    def test_stream_case(self):
        from pyflink.shell import s_env, st_env, DataTypes
        from pyflink.table.schema import Schema
        from pyflink.table.table_descriptor import TableDescriptor, FormatDescriptor
        # example begin

        import tempfile
        import os
        import shutil
        sink_path = tempfile.gettempdir() + '/streaming.csv'
        if os.path.exists(sink_path):
            if os.path.isfile(sink_path):
                os.remove(sink_path)
            else:
                shutil.rmtree(sink_path)
        s_env.set_parallelism(1)
        t = st_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')],
                                 ['a', 'b', 'c'])

        st_env.create_temporary_table(
            "stream_sink",
            TableDescriptor.for_connector("filesystem").schema(
                Schema.new_builder().column("a", DataTypes.BIGINT()).column(
                    "b", DataTypes.STRING()).column(
                        "c", DataTypes.STRING()).build()).option(
                            "path", sink_path).format(
                                FormatDescriptor.for_format("csv").option(
                                    "field-delimiter", ",").build()).build())

        t.select(t.a + 1, t.b, t.c).execute_insert("stream_sink").wait()

        # verify code, do not copy these code to shell.py
        with open(os.path.join(sink_path, os.listdir(sink_path)[0]), 'r') as f:
            lines = f.read()
            self.assertEqual(lines, '2,hi,hello\n' + '3,hi,hello\n')
Esempio n. 3
0
from pyflink.shell import s_env, st_env, DataTypes
from pyflink.table.schema import Schema
from pyflink.table.table_descriptor import TableDescriptor, FormatDescriptor

import tempfile
import os
import shutil

sink_path = tempfile.gettempdir() + '/batch.csv'
if os.path.exists(sink_path):
    if os.path.isfile(sink_path):
        os.remove(sink_path)
    else:
        shutil.rmtree(sink_path)
s_env.set_parallelism(1)
t = st_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')],
                         ['a', 'b', 'c'])

st_env.create_temporary_table(
    "csv_sink",
    TableDescriptor.for_connector("filesystem").schema(
        Schema.new_builder().column(
            "a", DataTypes.BIGINT()).column("b", DataTypes.STRING()).column(
                "c",
                DataTypes.STRING()).build()).option("path", sink_path).format(
                    FormatDescriptor.for_format("csv").option(
                        "field-delimiter", ",").build()).build())

t.select(t.a + lit(1), t.b, t.c).execute_insert("csv_sink").wait()

with open(os.path.join(sink_path, os.listdir(sink_path)[0]), 'r') as f:
    lines = f.read()