def setUp(self): self.env = StreamExecutionEnvironment.get_execution_environment() self.env.set_parallelism(2) self.test_sink = DataStreamTestSinkFunction()
def setUp(self) -> None: self.env = StreamExecutionEnvironment.get_execution_environment() self.test_sink = DataStreamTestSinkFunction() _load_specific_flink_module_jars( '/flink-connectors/flink-connector-files')
from pyflink.datastream import StreamExecutionEnvironment from pyflink.table import * from ml_udf import label_encode settings = EnvironmentSettings.new_instance().use_blink_planner().build() exec_env = StreamExecutionEnvironment.get_execution_environment() t_env = StreamTableEnvironment.create(exec_env, environment_settings=settings) t_env.create_temporary_function("label_encode", label_encode) CREATE_USER_TABLE_DDL = """ CREATE TABLE users ( user_id STRING, source STRING, sex_name STRING, age_name STRING, city_name STRING, pic_vip_type STRING, lt30 STRING, last_pic_app_active_device_type STRING, last_pic_app_active_device_model STRING, country_name STRING, province_name STRING, is_encodephone STRING, is_wechat STRING ) WITH ( 'connector' = 'filesystem', 'format' = 'csv', 'path' = 'users.csv' )
def create_new_env(): env = StreamExecutionEnvironment.get_execution_environment() env.set_parallelism(2) return env
def setUp(self) -> None: self.env = StreamExecutionEnvironment.get_execution_environment() self._cxt_clz_loader = get_gateway().jvm.Thread.currentThread( ).getContextClassLoader() _load_specific_flink_module_jars( '/flink-connectors/flink-connector-jdbc')
def setUp(self) -> None: self.env = StreamExecutionEnvironment.get_execution_environment() self.test_sink = DataStreamTestSinkFunction()
def setUp(self): super(PyFlinkStreamTableTestCase, self).setUp() self.env = StreamExecutionEnvironment.get_execution_environment() self.env.set_parallelism(2) self.t_env = StreamTableEnvironment.create(self.env)
def session_time_window_streaming(): s_env = StreamExecutionEnvironment.get_execution_environment() s_env.set_parallelism(1) s_env.set_stream_time_characteristic(TimeCharacteristic.EventTime) st_env = StreamTableEnvironment.create(s_env) result_file = "/tmp/session_time_window_streaming.csv" if os.path.exists(result_file): os.remove(result_file) st_env \ .connect( # declare the external system to connect to Kafka() .version("0.11") .topic("user") .start_from_earliest() .property("zookeeper.connect", "localhost:2181") .property("bootstrap.servers", "localhost:9092") ) \ .with_format( # declare a format for this system Json() .fail_on_missing_field(True) .json_schema( "{" " type: 'object'," " properties: {" " a: {" " type: 'string'" " }," " b: {" " type: 'string'" " }," " c: {" " type: 'string'" " }," " time: {" " type: 'string'," " format: 'date-time'" " }" " }" "}" ) ) \ .with_schema( # declare the schema of the table Schema() .field("rowtime", DataTypes.TIMESTAMP()) .rowtime( Rowtime() .timestamps_from_field("time") .watermarks_periodic_bounded(60000)) .field("a", DataTypes.STRING()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()) ) \ .in_append_mode() \ .register_table_source("source") st_env.register_table_sink( "result", CsvTableSink( ["a", "b"], [DataTypes.STRING(), DataTypes.STRING()], result_file)) st_env.scan("source").window(Session.with_gap("10.minutes").on("rowtime").alias("w")) \ .group_by("w, a") \ .select("a, max(b)").insert_into("result") st_env.execute("session time window streaming")
def setUp(self): self.env = StreamExecutionEnvironment.get_execution_environment()
def setUp(self) -> None: self.env = StreamExecutionEnvironment.get_execution_environment() getConfigurationMethod = invoke_java_object_method( self.env._j_stream_execution_environment, "getConfiguration") getConfigurationMethod.setString("akka.ask.timeout", "20 s") self.test_sink = DataStreamTestSinkFunction()