コード例 #1
0
ファイル: test_descriptor.py プロジェクト: zhaoawd/flink
    def test_watermarks_periodic_bounded(self):
        rowtime = Rowtime().watermarks_periodic_bounded(1000)

        properties = rowtime.to_properties()
        expected = {'rowtime.watermarks.type': 'periodic-bounded',
                    'rowtime.watermarks.delay': '1000'}
        self.assertEqual(expected, properties)
コード例 #2
0
ファイル: test_descriptor.py プロジェクト: zyset/flink-1
    def test_timestamps_from_field(self):
        rowtime = Rowtime().timestamps_from_field("rtime")

        properties = rowtime.to_properties()
        expected = {
            'rowtime.timestamps.type': 'from-field',
            'rowtime.timestamps.from': 'rtime'
        }
        self.assertEqual(expected, properties)
コード例 #3
0
ファイル: test_descriptor.py プロジェクト: zhaoawd/flink
    def test_watermarks_from_strategy(self):
        rowtime = Rowtime().watermarks_from_strategy(
            "org.apache.flink.table.legacyutils.CustomAssigner")

        properties = rowtime.to_properties()
        expected = {
            'rowtime.watermarks.type': 'custom',
            'rowtime.watermarks.class':
                'org.apache.flink.table.legacyutils.CustomAssigner',
            'rowtime.watermarks.serialized':
                'rO0ABXNyADFvcmcuYXBhY2hlLmZsaW5rLnRhYmxlLmxlZ2FjeXV0aWxzLkN1c3RvbUFzc2lnbmVyu_8'
                'TLNBQBsACAAB4cgBHb3JnLmFwYWNoZS5mbGluay50YWJsZS5zb3VyY2VzLndtc3RyYXRlZ2llcy5QdW'
                '5jdHVhdGVkV2F0ZXJtYXJrQXNzaWduZXKBUc57oaWu9AIAAHhyAD1vcmcuYXBhY2hlLmZsaW5rLnRhY'
                'mxlLnNvdXJjZXMud21zdHJhdGVnaWVzLldhdGVybWFya1N0cmF0ZWd53nt-g2OWaT4CAAB4cA'}
        self.assertEqual(expected, properties)
コード例 #4
0
ファイル: test_descriptor.py プロジェクト: Temitope-A/cog
    def test_timestamps_from_extractor(self):
        rowtime = Rowtime().timestamps_from_extractor(
            "org.apache.flink.table.descriptors.RowtimeTest$CustomExtractor")

        properties = rowtime.to_properties()
        expected = {
            'rowtime.timestamps.type': 'custom',
            'rowtime.timestamps.class':
                'org.apache.flink.table.descriptors.RowtimeTest$CustomExtractor',
            'rowtime.timestamps.serialized':
                'rO0ABXNyAD5vcmcuYXBhY2hlLmZsaW5rLnRhYmxlLmRlc2NyaXB0b3JzLlJvd3RpbWVUZXN0JEN1c3R'
                'vbUV4dHJhY3RvcoaChjMg55xwAgABTAAFZmllbGR0ABJMamF2YS9sYW5nL1N0cmluZzt4cgA-b3JnLm'
                'FwYWNoZS5mbGluay50YWJsZS5zb3VyY2VzLnRzZXh0cmFjdG9ycy5UaW1lc3RhbXBFeHRyYWN0b3Jf1'
                'Y6piFNsGAIAAHhwdAACdHM'}
        self.assertEqual(expected, properties)
コード例 #5
0
ファイル: test_descriptor.py プロジェクト: zhaoawd/flink
    def test_timestamps_from_extractor(self):
        rowtime = Rowtime().timestamps_from_extractor(
            "org.apache.flink.table.legacyutils.CustomExtractor")

        properties = rowtime.to_properties()
        expected = {
            'rowtime.timestamps.type': 'custom',
            'rowtime.timestamps.class':
                'org.apache.flink.table.legacyutils.CustomExtractor',
            'rowtime.timestamps.serialized':
                'rO0ABXNyADJvcmcuYXBhY2hlLmZsaW5rLnRhYmxlLmxlZ2FjeXV0aWxzLkN1c3RvbUV4dHJhY3Rvctj'
                'ZLTGK9XvxAgABTAAFZmllbGR0ABJMamF2YS9sYW5nL1N0cmluZzt4cgA-b3JnLmFwYWNoZS5mbGluay'
                '50YWJsZS5zb3VyY2VzLnRzZXh0cmFjdG9ycy5UaW1lc3RhbXBFeHRyYWN0b3Jf1Y6piFNsGAIAAHhwd'
                'AACdHM'}
        self.assertEqual(expected, properties)
コード例 #6
0
ファイル: test_descriptor.py プロジェクト: zyset/flink-1
    def test_rowtime(self):
        schema = Schema()\
            .field("int_field", DataTypes.INT())\
            .field("long_field", DataTypes.BIGINT())\
            .field("rtime", DataTypes.BIGINT())\
            .rowtime(
                Rowtime().timestamps_from_field("long_field").watermarks_periodic_bounded(5000))\
            .field("string_field", DataTypes.STRING())

        properties = schema.to_properties()
        print(properties)
        expected = {
            'schema.0.name': 'int_field',
            'schema.0.data-type': 'INT',
            'schema.1.name': 'long_field',
            'schema.1.data-type': 'BIGINT',
            'schema.2.name': 'rtime',
            'schema.2.data-type': 'BIGINT',
            'schema.2.rowtime.timestamps.type': 'from-field',
            'schema.2.rowtime.timestamps.from': 'long_field',
            'schema.2.rowtime.watermarks.type': 'periodic-bounded',
            'schema.2.rowtime.watermarks.delay': '5000',
            'schema.3.name': 'string_field',
            'schema.3.data-type': 'VARCHAR(2147483647)'
        }
        self.assertEqual(expected, properties)
コード例 #7
0
def register_rides_source(st_env):
    st_env \
        .connect(  # declare the external system to connect to
        Kafka()
            .version("0.11")
            .topic("Rides")
            .start_from_earliest()
            .property("zookeeper.connect", "zookeeper:2181")
            .property("bootstrap.servers", "kafka:9092")) \
        .with_format(  # declare a format for this system
        Json()
            .fail_on_missing_field(True)
            .schema(DataTypes.ROW([
            DataTypes.FIELD("rideId", DataTypes.BIGINT()),
            DataTypes.FIELD("isStart", DataTypes.BOOLEAN()),
            DataTypes.FIELD("eventTime", DataTypes.TIMESTAMP()),
            DataTypes.FIELD("lon", DataTypes.FLOAT()),
            DataTypes.FIELD("lat", DataTypes.FLOAT()),
            DataTypes.FIELD("psgCnt", DataTypes.INT()),
            DataTypes.FIELD("taxiId", DataTypes.BIGINT())]))) \
        .with_schema(  # declare the schema of the table
        Schema()
            .field("rideId", DataTypes.BIGINT())
            .field("taxiId", DataTypes.BIGINT())
            .field("isStart", DataTypes.BOOLEAN())
            .field("lon", DataTypes.FLOAT())
            .field("lat", DataTypes.FLOAT())
            .field("psgCnt", DataTypes.INT())
            .field("rideTime", DataTypes.TIMESTAMP())
            .rowtime(
            Rowtime()
                .timestamps_from_field("eventTime")
                .watermarks_periodic_bounded(60000))) \
        .in_append_mode() \
        .register_table_source("source")
コード例 #8
0
ファイル: test_descriptor.py プロジェクト: Temitope-A/cog
    def test_watermarks_from_strategy(self):
        rowtime = Rowtime().watermarks_from_strategy(
            "org.apache.flink.table.descriptors.RowtimeTest$CustomAssigner")

        properties = rowtime.to_properties()
        expected = {
            'rowtime.watermarks.type': 'custom',
            'rowtime.watermarks.class':
                'org.apache.flink.table.descriptors.RowtimeTest$CustomAssigner',
            'rowtime.watermarks.serialized':
                'rO0ABXNyAD1vcmcuYXBhY2hlLmZsaW5rLnRhYmxlLmRlc2NyaXB0b3JzLlJvd3RpbWVUZXN0JEN1c3R'
                'vbUFzc2lnbmVyeDcuDvfbu0kCAAB4cgBHb3JnLmFwYWNoZS5mbGluay50YWJsZS5zb3VyY2VzLndtc3'
                'RyYXRlZ2llcy5QdW5jdHVhdGVkV2F0ZXJtYXJrQXNzaWduZXKBUc57oaWu9AIAAHhyAD1vcmcuYXBhY'
                '2hlLmZsaW5rLnRhYmxlLnNvdXJjZXMud21zdHJhdGVnaWVzLldhdGVybWFya1N0cmF0ZWd53nt-g2OW'
                'aT4CAAB4cA'}
        self.assertEqual(expected, properties)
コード例 #9
0
ファイル: test.py プロジェクト: SISSeC/DETECT
def register_transactions_source(st_env):
    st_env.connect(Kafka()
                   .version("universal")
                   .topic("transactions-data")
                   .start_from_latest()
                   .property("zookeeper.connect", "host.docker.internal:2181")
                   .property("bootstrap.servers", "host.docker.internal:19091")) \
        .with_format(Json()
        .fail_on_missing_field(True)
        .schema(DataTypes.ROW([
        DataTypes.FIELD("customer", DataTypes.STRING()),
        DataTypes.FIELD("transaction_type", DataTypes.STRING()),
        DataTypes.FIELD("online_payment_amount", DataTypes.DOUBLE()),
        DataTypes.FIELD("in_store_payment_amount", DataTypes.DOUBLE()),
        DataTypes.FIELD("lat", DataTypes.DOUBLE()),
        DataTypes.FIELD("lon", DataTypes.DOUBLE()),
        DataTypes.FIELD("transaction_datetime", DataTypes.TIMESTAMP())]))) \
        .with_schema(Schema()
        .field("customer", DataTypes.STRING())
        .field("transaction_type", DataTypes.STRING())
        .field("online_payment_amount", DataTypes.DOUBLE())
        .field("in_store_payment_amount", DataTypes.DOUBLE())
        .field("lat", DataTypes.DOUBLE())
        .field("lon", DataTypes.DOUBLE())
        .field("rowtime", DataTypes.TIMESTAMP())
        .rowtime(
        Rowtime()
            .timestamps_from_field("transaction_datetime")
            .watermarks_periodic_bounded(60000))) \
        .in_append_mode() \
        .register_table_source("source")
コード例 #10
0
ファイル: test_descriptor.py プロジェクト: zjureel/flink
    def test_watermarks_from_strategy(self):
        rowtime = Rowtime().watermarks_from_strategy(
            "org.apache.flink.table.utils.TestingDescriptors$CustomAssigner")

        properties = rowtime.to_properties()
        expected = {
            'rowtime.watermarks.type':
            'custom',
            'rowtime.watermarks.class':
            'org.apache.flink.table.utils.TestingDescriptors$CustomAssigner',
            'rowtime.watermarks.serialized':
            'rO0ABXNyAD5vcmcuYXBhY2hlLmZsaW5rLnRhYmxlLnV0aWxzLlRlc3RpbmdEZXNjcmlwdG9ycyRDdXN0b2'
            '1Bc3NpZ25lcsY_Xt96bBjDAgAAeHIAR29yZy5hcGFjaGUuZmxpbmsudGFibGUuc291cmNlcy53bXN0cmF0'
            'ZWdpZXMuUHVuY3R1YXRlZFdhdGVybWFya0Fzc2lnbmVygVHOe6GlrvQCAAB4cgA9b3JnLmFwYWNoZS5mbG'
            'luay50YWJsZS5zb3VyY2VzLndtc3RyYXRlZ2llcy5XYXRlcm1hcmtTdHJhdGVned57foNjlmk-AgAAeHA'
        }
        self.assertEqual(expected, properties)
コード例 #11
0
ファイル: test_descriptor.py プロジェクト: zjureel/flink
    def test_timestamps_from_extractor(self):
        rowtime = Rowtime().timestamps_from_extractor(
            "org.apache.flink.table.utils.TestingDescriptors$CustomExtractor")

        properties = rowtime.to_properties()
        expected = {
            'rowtime.timestamps.type':
            'custom',
            'rowtime.timestamps.class':
            'org.apache.flink.table.utils.TestingDescriptors$CustomExtractor',
            'rowtime.timestamps.serialized':
            'rO0ABXNyAD9vcmcuYXBhY2hlLmZsaW5rLnRhYmxlLnV0aWxzLlRlc3RpbmdEZXNjcmlwdG9ycyRDdXN0b2'
            '1FeHRyYWN0b3K-MntVKO8Z7QIAAUwABWZpZWxkdAASTGphdmEvbGFuZy9TdHJpbmc7eHIAPm9yZy5hcGFj'
            'aGUuZmxpbmsudGFibGUuc291cmNlcy50c2V4dHJhY3RvcnMuVGltZXN0YW1wRXh0cmFjdG9yX9WOqYhTbB'
            'gCAAB4cHQAAnRz'
        }
        self.assertEqual(expected, properties)
コード例 #12
0
ファイル: distinct_agg.py プロジェクト: toby1991/pyflink-demo
def distinct_agg_streaming():
    s_env = StreamExecutionEnvironment.get_execution_environment()
    s_env.set_parallelism(1)
    s_env.set_stream_time_characteristic(TimeCharacteristic.EventTime)
    # use blink table planner
    st_env = StreamTableEnvironment.create(
        s_env,
        environment_settings=EnvironmentSettings.new_instance(
        ).in_streaming_mode().use_blink_planner().build())
    # use flink table planner
    # st_env = StreamTableEnvironment.create(s_env)
    st_env \
        .connect(  # declare the external system to connect to
            Kafka()
            .version("0.11")
            .topic("user")
            .start_from_earliest()
            .property("zookeeper.connect", "localhost:2181")
            .property("bootstrap.servers", "localhost:9092")
        ) \
        .with_format(  # declare a format for this system
            Json()
            .fail_on_missing_field(True)
            .json_schema(
                "{"
                "  type: 'object',"
                "  properties: {"
                "    a: {"
                "      type: 'string'"
                "    },"
                "    b: {"
                "      type: 'string'"
                "    },"
                "    c: {"
                "      type: 'string'"
                "    },"
                "    time: {"
                "      type: 'string',"
                "      format: 'date-time'"
                "    }"
                "  }"
                "}"
             )
         ) \
        .with_schema(  # declare the schema of the table
             Schema()
             .field("rowtime", DataTypes.TIMESTAMP())
             .rowtime(
                Rowtime()
                .timestamps_from_field("time")
                .watermarks_periodic_bounded(60000))
             .field("a", DataTypes.STRING())
             .field("b", DataTypes.STRING())
             .field("c", DataTypes.STRING())
         ) \
        .in_append_mode() \
        .register_table_source("Orders")
    st_env.connect(
        Elasticsearch()
        .version("6")
        .host("localhost", 9200, "http")
        .index("distinct_agg_streaming")
        .document_type('pyflink')
        .key_delimiter("_")
        .key_null_literal("null")
        .failure_handler_ignore()
        .disable_flush_on_checkpoint()
        .bulk_flush_max_actions(2)
        .bulk_flush_max_size("1 mb")
        .bulk_flush_interval(5000)
        ) \
        .with_schema(
            Schema()
            .field("a", DataTypes.STRING())
            .field("b", DataTypes.STRING())
        ) \
        .with_format(
           Json()
           .derive_schema()
        ) \
        .in_upsert_mode() \
        .register_table_sink("result")
    orders = st_env.scan("Orders")
    result = orders.window(Tumble.over("30.minutes").on("rowtime").alias("w")) \
        .group_by("a, w").select("a, b.max.distinct as d")
    result.insert_into("result")
    st_env.execute("distinct agg streaming")
コード例 #13
0
def tumble_time_window_streaming():
    s_env = StreamExecutionEnvironment.get_execution_environment()
    s_env.set_parallelism(1)
    s_env.set_stream_time_characteristic(TimeCharacteristic.EventTime)
    st_env = StreamTableEnvironment.create(s_env)
    result_file = "/tmp/tumble_time_window_streaming.csv"
    if os.path.exists(result_file):
        os.remove(result_file)
    st_env \
        .connect(  # declare the external system to connect to
            Kafka()
            .version("0.11")
            .topic("user")
            .start_from_earliest()
            .property("zookeeper.connect", "localhost:2181")
            .property("bootstrap.servers", "localhost:9092")
        ) \
        .with_format(  # declare a format for this system
            Json()
            .fail_on_missing_field(True)
            .json_schema(
                "{"
                "  type: 'object',"
                "  properties: {"
                "    a: {"
                "      type: 'string'"
                "    },"
                "    b: {"
                "      type: 'string'"
                "    },"
                "    c: {"
                "      type: 'string'"
                "    },"
                "    time: {"
                "      type: 'string',"
                "      format: 'date-time'"
                "    }"
                "  }"
                "}"
             )
         ) \
        .with_schema(  # declare the schema of the table
             Schema()
             .field("rowtime", DataTypes.TIMESTAMP())
             .rowtime(
                Rowtime()
                .timestamps_from_field("time")
                .watermarks_periodic_bounded(60000))
             .field("a", DataTypes.STRING())
             .field("b", DataTypes.STRING())
             .field("c", DataTypes.STRING())
         ) \
        .in_append_mode() \
        .register_table_source("source")

    st_env.register_table_sink("result",
                               CsvTableSink(["a", "b"],
                                            [DataTypes.STRING(),
                                             DataTypes.STRING()],
                                            result_file))

    st_env.scan("source").window(Tumble.over("1.hours").on("rowtime").alias("w")) \
        .group_by("w, a") \
        .select("a, max(b)").insert_into("result")

    st_env.execute("tumble time window streaming")
コード例 #14
0
ファイル: test_descriptor.py プロジェクト: zyset/flink-1
    def test_timestamps_from_source(self):
        rowtime = Rowtime().timestamps_from_source()

        properties = rowtime.to_properties()
        expected = {'rowtime.timestamps.type': 'from-source'}
        self.assertEqual(expected, properties)
コード例 #15
0
def pv_uv_demo():
    s_env = StreamExecutionEnvironment.get_execution_environment()
    s_env.set_stream_time_characteristic(TimeCharacteristic.EventTime)
    s_env.set_parallelism(1)
    # use blink table planner
    st_env = StreamTableEnvironment.create(
        s_env,
        environment_settings=EnvironmentSettings.new_instance(
        ).in_streaming_mode().use_blink_planner().build())
    # use flink table planner
    # st_env = StreamTableEnvironment.create(s_env)
    st_env \
        .connect(  # declare the external system to connect to
            Kafka()
            .version("0.11")
            .topic("user_behavior")
            .start_from_earliest()
            .property("zookeeper.connect", "localhost:2181")
            .property("bootstrap.servers", "localhost:9092")
        ) \
        .with_format(  # declare a format for this system
            Json()
            .fail_on_missing_field(True)
            .json_schema(
                "{"
                "  type: 'object',"
                "  properties: {"
                "    user_id: {"
                "      type: 'string'"
                "    },"
                "    item_id: {"
                "      type: 'string'"
                "    },"
                "    category_id: {"
                "      type: 'string'"
                "    },"
                "    behavior: {"
                "      type: 'string'"
                "    },"
                "    ts: {"
                "      type: 'string',"
                "      format: 'date-time'"
                "    }"
                "  }"
                "}"
            )
        ) \
        .with_schema(  # declare the schema of the table
            Schema()
            .field("user_id", DataTypes.STRING())
            .field("item_id", DataTypes.STRING())
            .field("category_id", DataTypes.STRING())
            .field("behavior", DataTypes.STRING())
            .field("rowtime", DataTypes.TIMESTAMP())
            .rowtime(
                Rowtime()
                .timestamps_from_field("ts")
                .watermarks_periodic_bounded(60000))
         ) \
        .in_append_mode() \
        .register_table_source("source")

    # use custom retract sink connector
    custom_connector = CustomConnectorDescriptor('jdbc', 1, False) \
        .property("connector.driver", "org.apache.derby.jdbc.ClientDriver") \
        .property("connector.url", "jdbc:derby://localhost:1527/firstdb") \
        .property("connector.table", "pv_uv_table") \
        .property("connector.write.flush.max-rows", "1")
    st_env.connect(custom_connector) \
        .with_schema(
        Schema()
            .field("startTime", DataTypes.TIMESTAMP())
            .field("endTime", DataTypes.TIMESTAMP())
            .field("pv", DataTypes.BIGINT())
            .field("uv", DataTypes.BIGINT())
    ).register_table_sink("sink")

    st_env.scan("source").window(Tumble.over("1.hours").on("rowtime").alias("w")) \
        .group_by("w") \
        .select("w.start as startTime, w.end as endTime, COUNT(1) as pv, user_id.count.distinct as uv").insert_into("sink")

    st_env.execute("table pv uv")
コード例 #16
0
ファイル: test_descriptor.py プロジェクト: zyset/flink-1
    def test_watermarks_periodic_ascending(self):
        rowtime = Rowtime().watermarks_periodic_ascending()

        properties = rowtime.to_properties()
        expected = {'rowtime.watermarks.type': 'periodic-ascending'}
        self.assertEqual(expected, properties)
コード例 #17
0
ファイル: myflink.py プロジェクト: zhaomuchuan/myflink
            "      type: 'string'"
            "    },"
            "    time: {"
            "      type: 'string',"
            "      format: 'date-time'"
            "    }"
            "  }"
            "}"
        )
    ) \
        .with_schema(  # declare the schema of the table
        Schema()
            .field("rowtime", DataTypes.TIMESTAMP())
            .rowtime(
            Rowtime()
                .timestamps_from_field("time")
                .watermarks_periodic_bounded(60000))
            .field("a", DataTypes.STRING())
            .field("b", DataTypes.DECIMAL(38,12,nullable=True))
            .field("c", DataTypes.STRING())
    ) \
        .in_append_mode() \
        .register_table_source("source")

    # way sink csv
    # st_env.register_table_sink("result_tab",
    #                            CsvTableSink(["a", "count"],
    #                                         [DataTypes.STRING(),
    #                                          DataTypes.DECIMAL(38, 12, nullable=True)],
    #                                         result_file))