def test_field(self): csv = OldCsv() csv.field("a", DataTypes.BIGINT()) csv.field("b", DataTypes.STRING()) csv.field("c", "SQL_TIMESTAMP") properties = csv.to_properties() expected = {'format.fields.0.name': 'a', 'format.fields.0.data-type': 'BIGINT', 'format.fields.1.name': 'b', 'format.fields.1.data-type': 'VARCHAR(2147483647)', 'format.fields.2.name': 'c', 'format.fields.2.data-type': 'TIMESTAMP(3)', 'format.type': 'csv', 'format.property-version': '1'} self.assertEqual(expected, properties)
# @Author : mh # @Site : # @File : TalbeAPI.py # @Software: PyCharm from pyflink.dataset import ExecutionEnvironment from pyflink.table import TableConfig, DataTypes, BatchTableEnvironment from pyflink.table.descriptors import Schema, OldCsv, FileSystem exec_env = ExecutionEnvironment.get_execution_environment() exec_env.set_parallelism(3) t_config = TableConfig() t_evn = BatchTableEnvironment.create(exec_env, t_config) t_evn.connect(FileSystem().path('/tmp/input')) \ .with_format(OldCsv.field('word'), DataTypes.STRING()) \ .with_schema(Schema().field('word'), DataTypes.STRING()) \ .create_temporary_table('mySource') t_evn.connect(FileSystem.path('/tmp/output')) \ .with_format(OldCsv .field_delimiter('\t') .field('word', DataTypes.STRING()) .field('count', DataTypes.BIGINT())) \ .with_schema(Schema.field('word', DataTypes.STRING()) .field('count', DataTypes.BIGINT())) \ .create_temporary_table('mySink') t_evn.from_path('mySource').group_by('word').select( 'word,count(1)').insert_into('mySink') t_evn.execute('tutorial_job')