def _open(self): # start BeamFnLoopbackWorkerPoolServicer when executed in MiniCluster j_configuration = get_j_env_configuration( self._j_stream_execution_environment) def startup_loopback_server(): from pyflink.common import Configuration from pyflink.fn_execution.beam.beam_worker_pool_service import \ BeamFnLoopbackWorkerPoolServicer config = Configuration(j_configuration=j_configuration) config.set_string("python.loopback-server.address", BeamFnLoopbackWorkerPoolServicer().start()) python_worker_execution_mode = os.environ.get( '_python_worker_execution_mode') if python_worker_execution_mode is None: if is_local_deployment(j_configuration): startup_loopback_server() elif python_worker_execution_mode == 'loopback': if is_local_deployment(j_configuration): startup_loopback_server() else: raise ValueError( "Loopback mode is enabled, however the job wasn't configured to " "run in local deployment mode") elif python_worker_execution_mode != 'process': raise ValueError( "It only supports to execute the Python worker in 'loopback' mode and 'process' " "mode, unknown mode '%s' is configured" % python_worker_execution_mode)
def _generate_stream_graph(self, clear_transformations: bool = False, job_name: str = None) \ -> JavaObject: gateway = get_gateway() JPythonConfigUtil = gateway.jvm.org.apache.flink.python.util.PythonConfigUtil # start BeamFnLoopbackWorkerPoolServicer when executed in MiniCluster j_configuration = get_j_env_configuration(self._j_stream_execution_environment) def startup_loopback_server(): jvm = gateway.jvm env_config = JPythonConfigUtil.getEnvironmentConfig( self._j_stream_execution_environment) parallelism = self.get_parallelism() if parallelism > 1 and env_config.containsKey(jvm.PythonOptions.PYTHON_ARCHIVES.key()): import logging logging.warning("Loopback mode is disabled as python archives are used and the " "parallelism of the job is greater than 1. The Python user-defined " "functions will be executed in an independent Python process.") else: from pyflink.fn_execution.beam.beam_worker_pool_service import \ BeamFnLoopbackWorkerPoolServicer j_env = jvm.System.getenv() get_field_value(j_env, "m").put( 'PYFLINK_LOOPBACK_SERVER_ADDRESS', BeamFnLoopbackWorkerPoolServicer().start()) python_worker_execution_mode = None if hasattr(self, "_python_worker_execution_mode"): python_worker_execution_mode = getattr(self, "_python_worker_execution_mode") if python_worker_execution_mode is None: if is_local_deployment(j_configuration): startup_loopback_server() elif python_worker_execution_mode == 'loopback': if is_local_deployment(j_configuration): startup_loopback_server() else: raise ValueError("Loopback mode is enabled, however the job wasn't configured to " "run in local deployment mode") elif python_worker_execution_mode != 'process': raise ValueError( "It only supports to execute the Python worker in 'loopback' mode and 'process' " "mode, unknown mode '%s' is configured" % python_worker_execution_mode) JPythonConfigUtil.configPythonOperator(self._j_stream_execution_environment) gateway.jvm.org.apache.flink.python.chain.PythonOperatorChainingOptimizer.apply( self._j_stream_execution_environment) JPythonConfigUtil.setPartitionCustomOperatorNumPartitions( get_field_value(self._j_stream_execution_environment, "transformations")) j_stream_graph = self._j_stream_execution_environment.getStreamGraph(clear_transformations) if job_name is not None: j_stream_graph.setJobName(job_name) return j_stream_graph
def _generate_stream_graph(self, clear_transformations: bool = False, job_name: str = None) \ -> JavaObject: gateway = get_gateway() JPythonConfigUtil = gateway.jvm.org.apache.flink.python.util.PythonConfigUtil # start BeamFnLoopbackWorkerPoolServicer when executed in MiniCluster j_configuration = get_j_env_configuration( self._j_stream_execution_environment) def startup_loopback_server(): from pyflink.fn_execution.beam.beam_worker_pool_service import \ BeamFnLoopbackWorkerPoolServicer jvm = gateway.jvm j_env = jvm.System.getenv() get_field_value(j_env, "m").put( 'PYFLINK_LOOPBACK_SERVER_ADDRESS', BeamFnLoopbackWorkerPoolServicer().start()) python_worker_execution_mode = None if hasattr(self, "_python_worker_execution_mode"): python_worker_execution_mode = getattr( self, "_python_worker_execution_mode") if python_worker_execution_mode is None: if is_local_deployment(j_configuration): startup_loopback_server() elif python_worker_execution_mode == 'loopback': if is_local_deployment(j_configuration): startup_loopback_server() else: raise ValueError( "Loopback mode is enabled, however the job wasn't configured to " "run in local deployment mode") elif python_worker_execution_mode != 'process': raise ValueError( "It only supports to execute the Python worker in 'loopback' mode and 'process' " "mode, unknown mode '%s' is configured" % python_worker_execution_mode) JPythonConfigUtil.configPythonOperator( self._j_stream_execution_environment) gateway.jvm.org.apache.flink.python.chain.PythonOperatorChainingOptimizer.apply( self._j_stream_execution_environment) JPythonConfigUtil.setPartitionCustomOperatorNumPartitions( get_field_value(self._j_stream_execution_environment, "transformations")) j_stream_graph = self._j_stream_execution_environment.getStreamGraph( clear_transformations) if job_name is not None: j_stream_graph.setJobName(job_name) return j_stream_graph
def _generate_stream_graph(self, clear_transformations: bool = False, job_name: str = None) \ -> JavaObject: gateway = get_gateway() JPythonConfigUtil = gateway.jvm.org.apache.flink.python.util.PythonConfigUtil # start BeamFnLoopbackWorkerPoolServicer when executed in MiniCluster j_configuration = get_j_env_configuration(self._j_stream_execution_environment) if not self._remote_mode and is_local_deployment(j_configuration): from pyflink.common import Configuration from pyflink.fn_execution.beam.beam_worker_pool_service import \ BeamFnLoopbackWorkerPoolServicer jvm = gateway.jvm env_config = JPythonConfigUtil.getEnvironmentConfig( self._j_stream_execution_environment) parallelism = self.get_parallelism() if parallelism > 1 and env_config.containsKey(jvm.PythonOptions.PYTHON_ARCHIVES.key()): import logging logging.warning("Lookback mode is disabled as python archives are used and the " "parallelism of the job is greater than 1. The Python user-defined " "functions will be executed in an independent Python process.") else: config = Configuration(j_configuration=j_configuration) config.set_string( "loopback.server.address", BeamFnLoopbackWorkerPoolServicer().start()) JPythonConfigUtil.configPythonOperator(self._j_stream_execution_environment) gateway.jvm.org.apache.flink.python.chain.PythonOperatorChainingOptimizer.apply( self._j_stream_execution_environment) JPythonConfigUtil.setPartitionCustomOperatorNumPartitions( get_field_value(self._j_stream_execution_environment, "transformations")) j_stream_graph = self._j_stream_execution_environment.getStreamGraph(clear_transformations) if job_name is not None: j_stream_graph.setJobName(job_name) return j_stream_graph