def log_processing():
    env = StreamExecutionEnvironment.get_execution_environment()
    env_settings = EnvironmentSettings.Builder().use_blink_planner().build()
    t_env = StreamTableEnvironment.create(stream_execution_environment=env,
                                          environment_settings=env_settings)
    t_env.get_config().get_configuration().set_boolean(
        "python.fn-execution.memory.managed", True)

    source_ddl = """
            CREATE TABLE payment_msg(
                createTime VARCHAR,
                orderId BIGINT,
                payAmount DOUBLE,
                payPlatform INT,
                provinceId INT
            ) WITH (
              'connector.type' = 'kafka',
              'connector.version' = 'universal',
              'connector.topic' = 'payment_msg',
              'connector.properties.bootstrap.servers' = 'kafka:9092',
              'connector.properties.group.id' = 'test_3',
              'connector.startup-mode' = 'latest-offset',
              'format.type' = 'json'
            )
            """

    es_sink_ddl = """
            CREATE TABLE es_sink (
            province VARCHAR PRIMARY KEY,
            pay_amount DOUBLE
            ) with (
                'connector.type' = 'elasticsearch',
                'connector.version' = '7',
                'connector.hosts' = 'http://elasticsearch:9200',
                'connector.index' = 'platform_pay_amount_1',
                'connector.document-type' = 'payment',
                'update-mode' = 'upsert',
                'connector.flush-on-checkpoint' = 'true',
                'connector.key-delimiter' = '$',
                'connector.key-null-literal' = 'n/a',
                'connector.bulk-flush.max-size' = '42mb',
                'connector.bulk-flush.max-actions' = '32',
                'connector.bulk-flush.interval' = '1000',
                'connector.bulk-flush.backoff.delay' = '1000',
                'format.type' = 'json'
            )
    """

    t_env.sql_update(source_ddl)
    t_env.sql_update(es_sink_ddl)
    t_env.register_function('province_id_to_name', province_id_to_name)

    t_env.from_path("payment_msg") \
        .select("province_id_to_name(provinceId) as province, payAmount") \
        .group_by("province") \
        .select("province, sum(payAmount) as pay_amount") \
        .insert_into("es_sink")

    t_env.execute("payment_demo")
Ejemplo n.º 2
0
def flink_processing():
    env = StreamExecutionEnvironment.get_execution_environment()
    env.set_parallelism(1)
    env_settings = EnvironmentSettings.Builder().use_blink_planner().build()
    t_env = StreamTableEnvironment.create(stream_execution_environment=env,
                                          environment_settings=env_settings)

    t_env.get_config().get_configuration().set_string(
        "pipeline.jars",
        "file:////D:/temp/kafka_2.12-2.7.0/flink-connector-kafka_2.11-1.12.0.jar;"
        "file:////D:/temp/kafka_2.12-2.7.0/flink-sql-connector-kafka_2.11-1.12.0.jar"
    )
    # create dummy functionality on writing data from Kafka in a table and then in another table
    source_ddl = """
                    CREATE TABLE source_num(
                      `ts` TIMESTAMP(3) METADATA FROM 'timestamp',
                      `step` FLOAT,
                      `edge_id` STRING,
                      `vehicle_num` INT
                    ) WITH (
                      'connector' = 'kafka',
                      'topic' = 'source_num',
                      'properties.bootstrap.servers' = 'localhost:9092',
                      'properties.group.id' = 'new_group2',
                      'format' = 'json'
                    )
                    """

    sink_ddl = """
                    CREATE TABLE sink_table_num(
                        `ts` TIMESTAMP(3) METADATA FROM 'timestamp',
                        `step` FLOAT,
                        `edge_id` STRING,
                        `vehicle_num` INT
                    ) WITH (
                      'connector' = 'kafka',
                      'topic' = 'sink_topic_num',
                      'properties.bootstrap.servers' = 'localhost:9092',
                      'format' = 'json'
                    )
                    """
    # Execute the queries
    t_env.execute_sql(source_ddl)
    t_env.execute_sql(sink_ddl)
    # The dummy stream processing query executed in Flink
    # select from table and insert in the sink table
    t_env.sql_query(
        "SELECT `ts`, `step`, `edge_id`, `vehicle_num` "
        "FROM `source_num` "
        "WHERE `edge_id`='313576543#2'"  # Dachauerstrasse - Lothstrasse
    ).execute_insert("sink_table_num").wait()
def log_processing():
    env = StreamExecutionEnvironment.get_execution_environment()
    env_settings = EnvironmentSettings.Builder().use_blink_planner().build()
    t_env = StreamTableEnvironment.create(stream_execution_environment=env,
                                          environment_settings=env_settings)
    # specify connector and format jars
    t_env.get_config().get_configuration().set_string("pipeline.jars",
                                                      "file://" + FAT_JAR_PATH)

    source_ddl = """
            CREATE TABLE source_table(
                a VARCHAR,
                b INT
            ) WITH (
              'connector' = 'kafka',
              'topic' = 'source_topic',
              'properties.bootstrap.servers' = 'localhost:9092',
              'properties.group.id' = 'test_group',
              'scan.startup.mode' = 'earliest-offset',
              'format' = 'json'
            )
            """

    sink_ddl = """
            CREATE TABLE sink_table(
                a VARCHAR
            ) WITH (
              'connector' = 'kafka',
              'topic' = 'sink_topic',
              'properties.bootstrap.servers' = 'localhost:9092',
              'format' = 'json'
            )
            """

    t_env.execute_sql(source_ddl)
    t_env.execute_sql(sink_ddl)

    t_env.from_path("source_table").select("a").execute_insert(
        "sink_table").wait()
Ejemplo n.º 4
0
def log_processing():
    env = StreamExecutionEnvironment.get_execution_environment()
    env.set_stream_time_characteristic(TimeCharacteristic.EventTime)
    env_settings = EnvironmentSettings.Builder().use_blink_planner().build()
    t_env = StreamTableEnvironment.create(stream_execution_environment=env,
                                          environment_settings=env_settings)
    t_env.get_config().get_configuration().set_boolean(
        "python.fn-execution.memory.managed", True)

    source_ddl = """
            CREATE TABLE payment_msg(
                createTime VARCHAR,
                rt as TO_TIMESTAMP(createTime),
                orderId BIGINT,
                payAmount DOUBLE,
                payPlatform INT,
                provinceId INT,
                WATERMARK FOR rt as rt - INTERVAL '2' SECOND
            ) WITH (
              'connector' = 'kafka-0.11',
              'topic' = 'payment_msg',
              'properties.bootstrap.servers' = 'kafka:9092',
              'scan.startup.mode' = 'latest-offset',
              'format' = 'json'
            )
            """

    es_sink_ddl = """
            CREATE TABLE es_sink (
            province VARCHAR,
            pay_amount DOUBLE,
            rowtime TIMESTAMP(3)
            ) with (
                'connector.type' = 'elasticsearch',
                'connector.version' = '7',
                'connector.hosts' = 'http://elasticsearch:9200',
                'connector.index' = 'platform_pay_amount_1',
                'connector.document-type' = 'payment',
                'update-mode' = 'append',
                'connector.flush-on-checkpoint' = 'true',
                'connector.key-delimiter' = '$',
                'connector.key-null-literal' = 'n/a',
                'connector.bulk-flush.max-size' = '42mb',
                'connector.bulk-flush.max-actions' = '32',
                'connector.bulk-flush.interval' = '1000',
                'connector.bulk-flush.backoff.delay' = '1000',
                'format.type' = 'json'
            )
    """

    t_env.sql_update(source_ddl)
    t_env.sql_update(es_sink_ddl)

    t_env.register_function('province_id_to_name', province_id_to_name)

    query = """
    select province_id_to_name(provinceId) as province, sum(payAmount) as pay_amount, tumble_start(rt, interval '5' seconds) as rowtime
    from payment_msg
    group by tumble(rt, interval '5' seconds), provinceId
    """

    t_env.sql_query(query).insert_into("es_sink")

    t_env.execute("payment_demo")