def test_from_configuration(self): config = Configuration() config.set_string("execution.runtime-mode", "batch") actual_setting = EnvironmentSettings.from_configuration(config) self.assertFalse(actual_setting.is_streaming_mode(), "Use batch mode.")
def startup_loopback_server(): from pyflink.common import Configuration from pyflink.fn_execution.beam.beam_worker_pool_service import \ BeamFnLoopbackWorkerPoolServicer config = Configuration(j_configuration=j_configuration) config.set_string("python.loopback-server.address", BeamFnLoopbackWorkerPoolServicer().start())
def startup_loopback_server(): from pyflink.common import Configuration from pyflink.fn_execution.beam.beam_worker_pool_service import \ BeamFnLoopbackWorkerPoolServicer config = Configuration(j_configuration=j_configuration) config.set_string("PYFLINK_LOOPBACK_SERVER_ADDRESS", BeamFnLoopbackWorkerPoolServicer().start())
def test_add_configuration(self): table_config = TableConfig.get_default() configuration = Configuration() configuration.set_string("k1", "v1") table_config.add_configuration(configuration) self.assertEqual(table_config.get("k1", ""), "v1")
def test_key_set(self): conf = Configuration() conf.set_string("k1", "v1") conf.set_string("k2", "v2") conf.set_string("k3", "v3") key_set = conf.key_set() self.assertEqual(key_set, {"k1", "k2", "k3"})
def test_contains_key(self): conf = Configuration() conf.set_string("k1", "v1") contains_k1 = conf.contains_key("k1") contains_k2 = conf.contains_key("k2") self.assertTrue(contains_k1) self.assertFalse(contains_k2)
def test_init(self): conf = Configuration() self.assertEqual(conf.to_dict(), dict()) conf.set_string("k1", "v1") conf2 = Configuration(conf) self.assertEqual(conf2.to_dict(), {"k1": "v1"})
def test_get_execution_environment_with_config(self): configuration = Configuration() configuration.set_integer('parallelism.default', 12) configuration.set_string('pipeline.name', 'haha') env = StreamExecutionEnvironment.get_execution_environment(configuration) execution_config = env.get_config() self.assertEqual(execution_config.get_parallelism(), 12) config = Configuration( j_configuration=get_j_env_configuration(env._j_stream_execution_environment)) self.assertEqual(config.get_string('pipeline.name', ''), 'haha')
def test_deepcopy(self): conf = Configuration() conf.set_string("k1", "v1") conf2 = deepcopy(conf) self.assertEqual(conf2, conf) conf2.set_string("k1", "v2") self.assertNotEqual(conf2, conf)
def setUp(self) -> None: from pyflink.datastream import StreamExecutionEnvironment super(DataStreamConversionTestCases, self).setUp() config = Configuration() config.set_string("akka.ask.timeout", "20 s") self.env = StreamExecutionEnvironment.get_execution_environment(config) self.t_env = StreamTableEnvironment.create(self.env) self.env.set_parallelism(2) self.t_env.get_config().set("python.fn-execution.bundle.size", "1") self.test_sink = DataStreamTestSinkFunction()
def test_add_all(self): conf = Configuration() conf.set_string("k1", "v1") conf2 = Configuration() conf2.add_all(conf) value1 = conf2.get_string("k1", "") self.assertEqual(value1, "v1") conf2.add_all(conf, "conf_") value2 = conf2.get_string("conf_k1", "") self.assertEqual(value2, "v1")
def test_configure(self): configuration = Configuration() configuration.set_string('pipeline.operator-chaining', 'false') configuration.set_string('pipeline.time-characteristic', 'IngestionTime') configuration.set_string('execution.buffer-timeout', '1 min') configuration.set_string('execution.checkpointing.timeout', '12000') configuration.set_string('state.backend', 'jobmanager') self.env.configure(configuration) self.assertEqual(self.env.is_chaining_enabled(), False) self.assertEqual(self.env.get_stream_time_characteristic(), TimeCharacteristic.IngestionTime) self.assertEqual(self.env.get_buffer_timeout(), 60000) self.assertEqual(self.env.get_checkpoint_config().get_checkpoint_timeout(), 12000) self.assertTrue(isinstance(self.env.get_state_backend(), MemoryStateBackend))
def test_to_dict(self): conf = Configuration() conf.set_string("k1", "v1") conf.set_integer("k2", 1) conf.set_float("k3", 1.2) conf.set_boolean("k4", True) target_dict = conf.to_dict() self.assertEqual(target_dict, { "k1": "v1", "k2": "1", "k3": "1.2", "k4": "true" })
def test_remove_config(self): conf = Configuration() conf.set_string("k1", "v1") conf.set_integer("k2", 1) self.assertTrue(conf.contains_key("k1")) self.assertTrue(conf.contains_key("k2")) self.assertTrue(conf.remove_config("k1")) self.assertFalse(conf.remove_config("k1")) self.assertFalse(conf.contains_key("k1")) conf.remove_config("k2") self.assertFalse(conf.contains_key("k2"))
def test_hash_equal_str(self): conf = Configuration() conf2 = Configuration() conf.set_string("k1", "v1") conf.set_integer("k2", 1) conf2.set_string("k1", "v1") self.assertNotEqual(hash(conf), hash(conf2)) self.assertNotEqual(conf, conf2) conf2.set_integer("k2", 1) self.assertEqual(hash(conf), hash(conf2)) self.assertEqual(conf, conf2) self.assertEqual(str(conf), "{k1=v1, k2=1}")
def test_add_all_to_dict(self): conf = Configuration() conf.set_string("k1", "v1") conf.set_integer("k2", 1) conf.set_float("k3", 1.2) conf.set_boolean("k4", True) conf.set_bytearray("k5", bytearray([1, 2, 3])) target_dict = dict() conf.add_all_to_dict(target_dict) self.assertEqual(target_dict, { "k1": "v1", "k2": 1, "k3": 1.2, "k4": True, "k5": bytearray([1, 2, 3]) })
def test_getters_and_setters(self): conf = Configuration() conf.set_string("str", "v1") conf.set_integer("int", 2) conf.set_boolean("bool", True) conf.set_float("float", 0.5) conf.set_bytearray("bytearray", bytearray([1, 2, 3])) str_value = conf.get_string("str", "") int_value = conf.get_integer("int", 0) bool_value = conf.get_boolean("bool", False) float_value = conf.get_float("float", 0) bytearray_value = conf.get_bytearray("bytearray", bytearray()) self.assertEqual(str_value, "v1") self.assertEqual(int_value, 2) self.assertEqual(bool_value, True) self.assertEqual(float_value, 0.5) self.assertEqual(bytearray_value, bytearray([1, 2, 3]))
def _generate_stream_graph(self, clear_transformations: bool = False, job_name: str = None) \ -> JavaObject: gateway = get_gateway() JPythonConfigUtil = gateway.jvm.org.apache.flink.python.util.PythonConfigUtil # start BeamFnLoopbackWorkerPoolServicer when executed in MiniCluster j_configuration = get_j_env_configuration(self._j_stream_execution_environment) if not self._remote_mode and is_local_deployment(j_configuration): from pyflink.common import Configuration from pyflink.fn_execution.beam.beam_worker_pool_service import \ BeamFnLoopbackWorkerPoolServicer jvm = gateway.jvm env_config = JPythonConfigUtil.getEnvironmentConfig( self._j_stream_execution_environment) parallelism = self.get_parallelism() if parallelism > 1 and env_config.containsKey(jvm.PythonOptions.PYTHON_ARCHIVES.key()): import logging logging.warning("Lookback mode is disabled as python archives are used and the " "parallelism of the job is greater than 1. The Python user-defined " "functions will be executed in an independent Python process.") else: config = Configuration(j_configuration=j_configuration) config.set_string( "loopback.server.address", BeamFnLoopbackWorkerPoolServicer().start()) JPythonConfigUtil.configPythonOperator(self._j_stream_execution_environment) gateway.jvm.org.apache.flink.python.chain.PythonOperatorChainingOptimizer.apply( self._j_stream_execution_environment) JPythonConfigUtil.setPartitionCustomOperatorNumPartitions( get_field_value(self._j_stream_execution_environment, "transformations")) j_stream_graph = self._j_stream_execution_environment.getStreamGraph(clear_transformations) if job_name is not None: j_stream_graph.setJobName(job_name) return j_stream_graph