def max_travellers_per_destination():
    env = StreamExecutionEnvironment.get_execution_environment()
    t_env = StreamTableEnvironment.create(stream_execution_environment=env)

    t_env.execute_sql(
        create_table_ddl(
            "WATERMARK FOR dropOffTime AS dropOffTime - INTERVAL '30' SECONDS")
    )
    taxi_ride = t_env.from_path('TaxiRide')
    no_of_travelers_per_dest = taxi_ride \
        .select(taxi_ride.passengerCount, taxi_ride.dropOffTime, taxi_ride.destLocationZone) \
        .window(Tumble().over('1.hour').on(taxi_ride.dropOffTime).alias('w')) \
        .group_by(taxi_ride.destLocationZone, col('w')) \
        .select(taxi_ride.destLocationZone, \
                col('w').start.alias('start'), \
                col('w').end.alias('end'), \
                taxi_ride.passengerCount.count.alias('cnt'))

    t_env.to_append_stream(
        no_of_travelers_per_dest,
        Types.ROW_NAMED(['destLocationZone', 'start', 'end', 'cnt'], [
            Types.STRING(),
            Types.SQL_TIMESTAMP(),
            Types.SQL_TIMESTAMP(),
            Types.LONG()
        ])).print()

    env.execute('Max-Travellers-Per-Destination')
def popular_taxi_vendor():
    env = StreamExecutionEnvironment.get_execution_environment()
    env.set_parallelism(1)
    t_env = StreamTableEnvironment.create(stream_execution_environment=env)

    t_env.execute_sql(
        create_table_ddl(
            "WATERMARK FOR pickupTime AS pickupTime - INTERVAL '30' SECONDS"))
    taxi_ride = t_env.from_path('TaxiRide')
    popular_rides = taxi_ride.select(taxi_ride.vendorId, taxi_ride.pickupTime) \
        .window(Slide.over('15.minutes').every('5.minutes').on(taxi_ride.pickupTime).alias('w')) \
        .group_by(taxi_ride.vendorId, col('w')) \
        .select(taxi_ride.vendorId, \
                col('w').start.alias('start'), \
                col('w').end.alias('end'), \
                taxi_ride.vendorId.count.alias('cnt'))

    t_env.to_append_stream(
        popular_rides,
        Types.ROW_NAMED(['vendorId', 'start', 'end', 'cnt'], [
            Types.INT(),
            Types.SQL_TIMESTAMP(),
            Types.SQL_TIMESTAMP(),
            Types.LONG()
        ])).print()

    env.execute('Popular-Taxi-Vendor')
 def setUp(self):
     super(PyFlinkBlinkStreamTableTestCase, self).setUp()
     self.env = StreamExecutionEnvironment.get_execution_environment()
     self.env.set_parallelism(2)
     self.t_env = StreamTableEnvironment.create(
         self.env,
         environment_settings=EnvironmentSettings.new_instance(
         ).in_streaming_mode().use_blink_planner().build())
Beispiel #4
0
 def setUp(self):
     super(PyFlinkBlinkStreamTableTestCase, self).setUp()
     self.env = StreamExecutionEnvironment.get_execution_environment()
     self.env.set_parallelism(2)
     self.t_env = StreamTableEnvironment.create(
         self.env, environment_settings=EnvironmentSettings.new_instance()
             .in_streaming_mode().use_blink_planner().build())
     self.t_env.get_config().get_configuration().set_string(
         "taskmanager.memory.task.off-heap.size", "80mb")
Beispiel #5
0
 def setUp(self):
     super(PyFlinkBlinkStreamTableTestCase, self).setUp()
     self.env = StreamExecutionEnvironment.get_execution_environment()
     self.env.set_parallelism(2)
     self.t_env = StreamTableEnvironment.create(
         self.env, environment_settings=EnvironmentSettings.new_instance()
             .in_streaming_mode().use_blink_planner().build())
     self.t_env.get_config().get_configuration().set_string(
         "python.fn-execution.bundle.size", "1")
Beispiel #6
0
    def get_stream_table_environment(self) -> StreamTableEnvironment:
        """
        Get the StreamTableEnvironment. If the StreamTableEnvironment has not been set,
        it initial the StreamTableEnvironment with default Configuration.

        :return: the StreamTableEnvironment.

        .. versionadded:: 1.11.0
        """
        if self._stream_tab_env is None:
            self._stream_tab_env = StreamTableEnvironment.create(
                StreamExecutionEnvironment.get_execution_environment())
        return self._stream_tab_env
Beispiel #7
0
    def test_create_table_environment(self):
        table_config = TableConfig()
        table_config.set_max_generated_code_length(32000)
        table_config.set_null_check(False)
        table_config.set_timezone("Asia/Shanghai")

        env = StreamExecutionEnvironment.get_execution_environment()
        t_env = StreamTableEnvironment.create(env, table_config)

        readed_table_config = t_env.get_config()

        self.assertFalse(readed_table_config.get_null_check())
        self.assertEqual(readed_table_config.get_max_generated_code_length(), 32000)
        self.assertEqual(readed_table_config.get_timezone(), "Asia/Shanghai")
Beispiel #8
0
    def get_default() -> Optional[MLEnvironment]:
        """
        Get the MLEnvironment use the default MLEnvironmentId.

        :return: the default MLEnvironment.

        .. versionadded:: 1.11.0
        """
        with MLEnvironmentFactory._lock:
            if MLEnvironmentFactory._map[MLEnvironmentFactory._default_ml_environment_id] is None:
                j_ml_env = get_gateway().\
                    jvm.org.apache.flink.ml.common.MLEnvironmentFactory.getDefault()
                ml_env = MLEnvironment(
                    ExecutionEnvironment(j_ml_env.getExecutionEnvironment()),
                    StreamExecutionEnvironment(j_ml_env.getStreamExecutionEnvironment()),
                    BatchTableEnvironment(j_ml_env.getBatchTableEnvironment()),
                    StreamTableEnvironment(j_ml_env.getStreamTableEnvironment()))
                MLEnvironmentFactory._map[MLEnvironmentFactory._default_ml_environment_id] = ml_env

            return MLEnvironmentFactory._map[MLEnvironmentFactory._default_ml_environment_id]
Beispiel #9
0
def popular_destination_query():
    env = StreamExecutionEnvironment.get_execution_environment()
    t_env = StreamTableEnvironment.create(stream_execution_environment=env)

    t_env.execute_sql(
        create_table_ddl(
            "WATERMARK FOR pickupTime AS pickupTime - INTERVAL '30' SECONDS"))

    query = f"""SELECT 
    destLocationId, wstart, wend, cnt 
FROM 
    (SELECT 
        destLocationId, 
        HOP_START(pickupTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE) AS wstart, 
        HOP_END(pickupTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE) AS wend, 
        COUNT(destLocationId) AS cnt 
    FROM
        (SELECT 
            pickupTime, 
            destLocationId 
        FROM TaxiRide) 
    GROUP BY
        destLocationId, HOP(pickupTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE)
    )
WHERE cnt > {args.threshold}
"""

    results = t_env.sql_query(query)

    t_env.to_append_stream(
        results,
        Types.ROW_NAMED(['destLocationId', 'wstart', 'wend', 'cnt'], [
            Types.INT(),
            Types.SQL_TIMESTAMP(),
            Types.SQL_TIMESTAMP(),
            Types.LONG()
        ])).print()

    env.execute('Popular-Destination')