Example #1
0
    def test_path(self):
        file_system = FileSystem()

        file_system = file_system.path("/test.csv")

        properties = file_system.to_properties()
        expected = {'connector.property-version': '1',
                    'connector.type': 'filesystem',
                    'connector.path': '/test.csv'}
        self.assertEqual(expected, properties)
Example #2
0
# @Site :
# @File : TalbeAPI.py
# @Software: PyCharm

from pyflink.dataset import ExecutionEnvironment
from pyflink.table import TableConfig, DataTypes, BatchTableEnvironment
from pyflink.table.descriptors import Schema, OldCsv, FileSystem

exec_env = ExecutionEnvironment.get_execution_environment()
exec_env.set_parallelism(3)
t_config = TableConfig()
t_evn = BatchTableEnvironment.create(exec_env, t_config)

t_evn.connect(FileSystem().path('/tmp/input')) \
    .with_format(OldCsv.field('word'), DataTypes.STRING()) \
    .with_schema(Schema().field('word'), DataTypes.STRING()) \
    .create_temporary_table('mySource')

t_evn.connect(FileSystem.path('/tmp/output')) \
    .with_format(OldCsv
                 .field_delimiter('\t')
                 .field('word', DataTypes.STRING())
                 .field('count', DataTypes.BIGINT())) \
    .with_schema(Schema.field('word', DataTypes.STRING())
                 .field('count', DataTypes.BIGINT())) \
    .create_temporary_table('mySink')

t_evn.from_path('mySource').group_by('word').select(
    'word,count(1)').insert_into('mySink')
t_evn.execute('tutorial_job')