Ejemplo n.º 1
0
    def test_rowtime(self):
        schema = Schema()

        schema = schema\
            .field("int_field", DataTypes.INT())\
            .field("long_field", DataTypes.BIGINT())\
            .field("rtime", DataTypes.BIGINT())\
            .rowtime(
                Rowtime().timestamps_from_field("long_field").watermarks_periodic_bounded(5000))\
            .field("string_field", DataTypes.STRING())

        properties = schema.to_properties()
        print(properties)
        expected = {'schema.0.name': 'int_field',
                    'schema.0.type': 'INT',
                    'schema.1.name': 'long_field',
                    'schema.1.type': 'BIGINT',
                    'schema.2.name': 'rtime',
                    'schema.2.type': 'BIGINT',
                    'schema.2.rowtime.timestamps.type': 'from-field',
                    'schema.2.rowtime.timestamps.from': 'long_field',
                    'schema.2.rowtime.watermarks.type': 'periodic-bounded',
                    'schema.2.rowtime.watermarks.delay': '5000',
                    'schema.3.name': 'string_field',
                    'schema.3.type': 'VARCHAR'}
        assert properties == expected
Ejemplo n.º 2
0
    def test_schema():
        schema = Schema()
        table_schema = TableSchema(["a", "b"], [DataTypes.INT(), DataTypes.STRING()])

        schema = schema.schema(table_schema)

        properties = schema.to_properties()
        expected = {'schema.0.name': 'a',
                    'schema.0.type': 'INT',
                    'schema.1.name': 'b',
                    'schema.1.type': 'VARCHAR'}
        assert properties == expected
Ejemplo n.º 3
0
    def test_end_to_end(self):
        source_path = os.path.join(self.tempdir + '/streaming.csv')
        with open(source_path, 'w') as f:
            lines = 'a,b,c\n' + \
                    '1,hi,hello\n' + \
                    '#comments\n' + \
                    "error line\n" + \
                    '2,"hi,world!",hello\n'
            f.write(lines)
            f.close()
        sink_path = os.path.join(self.tempdir + '/streaming2.csv')
        t_env = self.t_env
        # connect source
        t_env.connect(FileSystem().path(source_path))\
             .with_format(OldCsv()
                          .field_delimiter(',')
                          .line_delimiter("\n")
                          .ignore_parse_errors()
                          .quote_character('"')
                          .comment_prefix("#")
                          .ignore_first_line()
                          .field("a", "INT")
                          .field("b", "VARCHAR")
                          .field("c", "VARCHAR"))\
             .with_schema(Schema()
                          .field("a", "INT")
                          .field("b", "VARCHAR")
                          .field("c", "VARCHAR"))\
             .in_append_mode()\
             .register_table_source("source")
        # connect sink
        t_env.connect(FileSystem().path(sink_path))\
             .with_format(OldCsv()
                          .field_delimiter(',')
                          .field("a", DataTypes.INT())
                          .field("b", DataTypes.STRING())
                          .field("c", DataTypes.STRING()))\
             .with_schema(Schema()
                          .field("a", DataTypes.INT())
                          .field("b", DataTypes.STRING())
                          .field("c", DataTypes.STRING()))\
             .register_table_sink("sink")

        t_env.scan("source") \
             .select("a + 1, b, c") \
             .insert_into("sink")
        t_env.execute()

        with open(sink_path, 'r') as f:
            lines = f.read()
            assert lines == '2,hi,hello\n' + '3,hi,world!,hello\n'
Ejemplo n.º 4
0
    def test_schema(self):
        table_schema = TableSchema(
            ["a", "b"], [DataTypes.INT(), DataTypes.STRING()])

        schema = Schema().schema(table_schema)

        properties = schema.to_properties()
        expected = {
            'schema.0.name': 'a',
            'schema.0.type': 'INT',
            'schema.1.name': 'b',
            'schema.1.type': 'VARCHAR'
        }
        self.assertEqual(expected, properties)
Ejemplo n.º 5
0
    def test_register_table_source(self):
        source_path = os.path.join(self.tempdir + '/streaming.csv')
        field_names = ["a", "b", "c"]
        field_types = [DataTypes.INT(), DataTypes.STRING(), DataTypes.STRING()]
        data = [(1, "Hi", "Hello"), (2, "Hello", "Hello")]
        self.prepare_csv_source(source_path, data, field_types, field_names)
        t_env = self.t_env
        sink_path = os.path.join(self.tempdir + '/streaming2.csv')
        if os.path.isfile(sink_path):
            os.remove(sink_path)
        t_env.register_table_sink(
            "sink",
            field_names, field_types, CsvTableSink(sink_path))

        # connect source
        t_env.connect(FileSystem().path(source_path))\
             .with_format(OldCsv()
                          .field_delimiter(',')
                          .field("a", DataTypes.INT())
                          .field("b", DataTypes.STRING())
                          .field("c", DataTypes.STRING()))\
             .with_schema(Schema()
                          .field("a", DataTypes.INT())
                          .field("b", DataTypes.STRING())
                          .field("c", DataTypes.STRING()))\
             .register_table_source("source")
        t_env.scan("source") \
             .select("a + 1, b, c") \
             .insert_into("sink")
        t_env.execute()

        with open(sink_path, 'r') as f:
            lines = f.read()
            assert lines == '2,Hi,Hello\n' + '3,Hello,Hello\n'
Ejemplo n.º 6
0
    def test_proctime(self):
        schema = Schema()

        schema = schema\
            .field("int_field", DataTypes.INT())\
            .field("ptime", DataTypes.BIGINT()).proctime()\
            .field("string_field", DataTypes.STRING())

        properties = schema.to_properties()
        expected = {'schema.0.name': 'int_field',
                    'schema.0.type': 'INT',
                    'schema.1.name': 'ptime',
                    'schema.1.type': 'BIGINT',
                    'schema.1.proctime': 'true',
                    'schema.2.name': 'string_field',
                    'schema.2.type': 'VARCHAR'}
        assert properties == expected
Ejemplo n.º 7
0
    def test_from_origin_field(self):
        schema = Schema()

        schema = schema\
            .field("int_field", DataTypes.INT())\
            .field("long_field", DataTypes.BIGINT()).from_origin_field("origin_field_a")\
            .field("string_field", DataTypes.STRING())

        properties = schema.to_properties()
        expected = {'schema.0.name': 'int_field',
                    'schema.0.type': 'INT',
                    'schema.1.name': 'long_field',
                    'schema.1.type': 'BIGINT',
                    'schema.1.from': 'origin_field_a',
                    'schema.2.name': 'string_field',
                    'schema.2.type': 'VARCHAR'}
        assert properties == expected
Ejemplo n.º 8
0
    def test_field_in_string(self):
        schema = Schema()

        schema = schema\
            .field("int_field", 'INT')\
            .field("long_field", 'BIGINT')\
            .field("string_field", 'VARCHAR')\
            .field("timestamp_field", 'SQL_TIMESTAMP')\
            .field("time_field", 'SQL_TIME')\
            .field("date_field", 'SQL_DATE')\
            .field("double_field", 'DOUBLE')\
            .field("float_field", 'FLOAT')\
            .field("byte_field", 'TINYINT')\
            .field("short_field", 'SMALLINT')\
            .field("boolean_field", 'BOOLEAN')

        properties = schema.to_properties()
        expected = {
            'schema.0.name': 'int_field',
            'schema.0.type': 'INT',
            'schema.1.name': 'long_field',
            'schema.1.type': 'BIGINT',
            'schema.2.name': 'string_field',
            'schema.2.type': 'VARCHAR',
            'schema.3.name': 'timestamp_field',
            'schema.3.type': 'SQL_TIMESTAMP',
            'schema.4.name': 'time_field',
            'schema.4.type': 'SQL_TIME',
            'schema.5.name': 'date_field',
            'schema.5.type': 'SQL_DATE',
            'schema.6.name': 'double_field',
            'schema.6.type': 'DOUBLE',
            'schema.7.name': 'float_field',
            'schema.7.type': 'FLOAT',
            'schema.8.name': 'byte_field',
            'schema.8.type': 'TINYINT',
            'schema.9.name': 'short_field',
            'schema.9.type': 'SMALLINT',
            'schema.10.name': 'boolean_field',
            'schema.10.type': 'BOOLEAN'
        }
        assert properties == expected
Ejemplo n.º 9
0
    def test_field(self):
        schema = Schema()

        schema = schema\
            .field("int_field", DataTypes.INT)\
            .field("long_field", DataTypes.LONG)\
            .field("string_field", DataTypes.STRING)\
            .field("timestamp_field", DataTypes.TIMESTAMP)\
            .field("time_field", DataTypes.TIME)\
            .field("date_field", DataTypes.DATE)\
            .field("double_field", DataTypes.DOUBLE)\
            .field("float_field", DataTypes.FLOAT)\
            .field("byte_field", DataTypes.BYTE)\
            .field("short_field", DataTypes.SHORT)\
            .field("boolean_field", DataTypes.BOOLEAN)

        properties = schema.to_properties()
        expected = {
            'schema.0.name': 'int_field',
            'schema.0.type': 'INT',
            'schema.1.name': 'long_field',
            'schema.1.type': 'BIGINT',
            'schema.2.name': 'string_field',
            'schema.2.type': 'VARCHAR',
            'schema.3.name': 'timestamp_field',
            'schema.3.type': 'TIMESTAMP',
            'schema.4.name': 'time_field',
            'schema.4.type': 'TIME',
            'schema.5.name': 'date_field',
            'schema.5.type': 'DATE',
            'schema.6.name': 'double_field',
            'schema.6.type': 'DOUBLE',
            'schema.7.name': 'float_field',
            'schema.7.type': 'FLOAT',
            'schema.8.name': 'byte_field',
            'schema.8.type': 'TINYINT',
            'schema.9.name': 'short_field',
            'schema.9.type': 'SMALLINT',
            'schema.10.name': 'boolean_field',
            'schema.10.type': 'BOOLEAN'
        }
        assert properties == expected
Ejemplo n.º 10
0
    def test_field_in_string(self):
        schema = Schema()

        schema = schema\
            .field("int_field", 'INT')\
            .field("long_field", 'BIGINT')\
            .field("string_field", 'VARCHAR')\
            .field("timestamp_field", 'SQL_TIMESTAMP')\
            .field("time_field", 'SQL_TIME')\
            .field("date_field", 'SQL_DATE')\
            .field("double_field", 'DOUBLE')\
            .field("float_field", 'FLOAT')\
            .field("byte_field", 'TINYINT')\
            .field("short_field", 'SMALLINT')\
            .field("boolean_field", 'BOOLEAN')

        properties = schema.to_properties()
        expected = {'schema.0.name': 'int_field',
                    'schema.0.type': 'INT',
                    'schema.1.name': 'long_field',
                    'schema.1.type': 'BIGINT',
                    'schema.2.name': 'string_field',
                    'schema.2.type': 'VARCHAR',
                    'schema.3.name': 'timestamp_field',
                    'schema.3.type': 'SQL_TIMESTAMP',
                    'schema.4.name': 'time_field',
                    'schema.4.type': 'SQL_TIME',
                    'schema.5.name': 'date_field',
                    'schema.5.type': 'SQL_DATE',
                    'schema.6.name': 'double_field',
                    'schema.6.type': 'DOUBLE',
                    'schema.7.name': 'float_field',
                    'schema.7.type': 'FLOAT',
                    'schema.8.name': 'byte_field',
                    'schema.8.type': 'TINYINT',
                    'schema.9.name': 'short_field',
                    'schema.9.type': 'SMALLINT',
                    'schema.10.name': 'boolean_field',
                    'schema.10.type': 'BOOLEAN'}
        assert properties == expected
Ejemplo n.º 11
0
    def test_field(self):
        schema = Schema()

        schema = schema\
            .field("int_field", DataTypes.INT())\
            .field("long_field", DataTypes.BIGINT())\
            .field("string_field", DataTypes.STRING())\
            .field("timestamp_field", DataTypes.TIMESTAMP())\
            .field("time_field", DataTypes.TIME())\
            .field("date_field", DataTypes.DATE())\
            .field("double_field", DataTypes.DOUBLE())\
            .field("float_field", DataTypes.FLOAT())\
            .field("byte_field", DataTypes.TINYINT())\
            .field("short_field", DataTypes.SMALLINT())\
            .field("boolean_field", DataTypes.BOOLEAN())

        properties = schema.to_properties()
        expected = {'schema.0.name': 'int_field',
                    'schema.0.type': 'INT',
                    'schema.1.name': 'long_field',
                    'schema.1.type': 'BIGINT',
                    'schema.2.name': 'string_field',
                    'schema.2.type': 'VARCHAR',
                    'schema.3.name': 'timestamp_field',
                    'schema.3.type': 'TIMESTAMP',
                    'schema.4.name': 'time_field',
                    'schema.4.type': 'TIME',
                    'schema.5.name': 'date_field',
                    'schema.5.type': 'DATE',
                    'schema.6.name': 'double_field',
                    'schema.6.type': 'DOUBLE',
                    'schema.7.name': 'float_field',
                    'schema.7.type': 'FLOAT',
                    'schema.8.name': 'byte_field',
                    'schema.8.type': 'TINYINT',
                    'schema.9.name': 'short_field',
                    'schema.9.type': 'SMALLINT',
                    'schema.10.name': 'boolean_field',
                    'schema.10.type': 'BOOLEAN'}
        assert properties == expected
Ejemplo n.º 12
0
    def test_with_schema(self):
        descriptor = self.t_env.connect(FileSystem())

        descriptor = descriptor.with_format(OldCsv()).with_schema(Schema().field("a", "INT"))

        properties = descriptor.to_properties()
        expected = {'schema.0.name': 'a',
                    'schema.0.type': 'INT',
                    'format.type': 'csv',
                    'format.property-version': '1',
                    'connector.type': 'filesystem',
                    'connector.property-version': '1'}
        assert properties == expected