Example #1
0
    def _open(self):
        # start BeamFnLoopbackWorkerPoolServicer when executed in MiniCluster
        j_configuration = get_j_env_configuration(
            self._j_stream_execution_environment)

        def startup_loopback_server():
            from pyflink.common import Configuration
            from pyflink.fn_execution.beam.beam_worker_pool_service import \
                BeamFnLoopbackWorkerPoolServicer
            config = Configuration(j_configuration=j_configuration)
            config.set_string("python.loopback-server.address",
                              BeamFnLoopbackWorkerPoolServicer().start())

        python_worker_execution_mode = os.environ.get(
            '_python_worker_execution_mode')

        if python_worker_execution_mode is None:
            if is_local_deployment(j_configuration):
                startup_loopback_server()
        elif python_worker_execution_mode == 'loopback':
            if is_local_deployment(j_configuration):
                startup_loopback_server()
            else:
                raise ValueError(
                    "Loopback mode is enabled, however the job wasn't configured to "
                    "run in local deployment mode")
        elif python_worker_execution_mode != 'process':
            raise ValueError(
                "It only supports to execute the Python worker in 'loopback' mode and 'process' "
                "mode, unknown mode '%s' is configured" %
                python_worker_execution_mode)
    def _generate_stream_graph(self, clear_transformations: bool = False, job_name: str = None) \
            -> JavaObject:
        gateway = get_gateway()
        JPythonConfigUtil = gateway.jvm.org.apache.flink.python.util.PythonConfigUtil
        # start BeamFnLoopbackWorkerPoolServicer when executed in MiniCluster
        j_configuration = get_j_env_configuration(self._j_stream_execution_environment)

        def startup_loopback_server():
            jvm = gateway.jvm
            env_config = JPythonConfigUtil.getEnvironmentConfig(
                self._j_stream_execution_environment)
            parallelism = self.get_parallelism()
            if parallelism > 1 and env_config.containsKey(jvm.PythonOptions.PYTHON_ARCHIVES.key()):
                import logging
                logging.warning("Loopback mode is disabled as python archives are used and the "
                                "parallelism of the job is greater than 1. The Python user-defined "
                                "functions will be executed in an independent Python process.")
            else:
                from pyflink.fn_execution.beam.beam_worker_pool_service import \
                    BeamFnLoopbackWorkerPoolServicer
                j_env = jvm.System.getenv()
                get_field_value(j_env, "m").put(
                    'PYFLINK_LOOPBACK_SERVER_ADDRESS', BeamFnLoopbackWorkerPoolServicer().start())

        python_worker_execution_mode = None
        if hasattr(self, "_python_worker_execution_mode"):
            python_worker_execution_mode = getattr(self, "_python_worker_execution_mode")

        if python_worker_execution_mode is None:
            if is_local_deployment(j_configuration):
                startup_loopback_server()
        elif python_worker_execution_mode == 'loopback':
            if is_local_deployment(j_configuration):
                startup_loopback_server()
            else:
                raise ValueError("Loopback mode is enabled, however the job wasn't configured to "
                                 "run in local deployment mode")
        elif python_worker_execution_mode != 'process':
            raise ValueError(
                "It only supports to execute the Python worker in 'loopback' mode and 'process' "
                "mode, unknown mode '%s' is configured" % python_worker_execution_mode)

        JPythonConfigUtil.configPythonOperator(self._j_stream_execution_environment)

        gateway.jvm.org.apache.flink.python.chain.PythonOperatorChainingOptimizer.apply(
            self._j_stream_execution_environment)

        JPythonConfigUtil.setPartitionCustomOperatorNumPartitions(
            get_field_value(self._j_stream_execution_environment, "transformations"))

        j_stream_graph = self._j_stream_execution_environment.getStreamGraph(clear_transformations)
        if job_name is not None:
            j_stream_graph.setJobName(job_name)
        return j_stream_graph
    def _generate_stream_graph(self, clear_transformations: bool = False, job_name: str = None) \
            -> JavaObject:
        gateway = get_gateway()
        JPythonConfigUtil = gateway.jvm.org.apache.flink.python.util.PythonConfigUtil
        # start BeamFnLoopbackWorkerPoolServicer when executed in MiniCluster
        j_configuration = get_j_env_configuration(
            self._j_stream_execution_environment)

        def startup_loopback_server():
            from pyflink.fn_execution.beam.beam_worker_pool_service import \
                BeamFnLoopbackWorkerPoolServicer
            jvm = gateway.jvm
            j_env = jvm.System.getenv()
            get_field_value(j_env, "m").put(
                'PYFLINK_LOOPBACK_SERVER_ADDRESS',
                BeamFnLoopbackWorkerPoolServicer().start())

        python_worker_execution_mode = None
        if hasattr(self, "_python_worker_execution_mode"):
            python_worker_execution_mode = getattr(
                self, "_python_worker_execution_mode")

        if python_worker_execution_mode is None:
            if is_local_deployment(j_configuration):
                startup_loopback_server()
        elif python_worker_execution_mode == 'loopback':
            if is_local_deployment(j_configuration):
                startup_loopback_server()
            else:
                raise ValueError(
                    "Loopback mode is enabled, however the job wasn't configured to "
                    "run in local deployment mode")
        elif python_worker_execution_mode != 'process':
            raise ValueError(
                "It only supports to execute the Python worker in 'loopback' mode and 'process' "
                "mode, unknown mode '%s' is configured" %
                python_worker_execution_mode)

        JPythonConfigUtil.configPythonOperator(
            self._j_stream_execution_environment)

        gateway.jvm.org.apache.flink.python.chain.PythonOperatorChainingOptimizer.apply(
            self._j_stream_execution_environment)

        JPythonConfigUtil.setPartitionCustomOperatorNumPartitions(
            get_field_value(self._j_stream_execution_environment,
                            "transformations"))

        j_stream_graph = self._j_stream_execution_environment.getStreamGraph(
            clear_transformations)
        if job_name is not None:
            j_stream_graph.setJobName(job_name)
        return j_stream_graph
Example #4
0
    def _generate_stream_graph(self, clear_transformations: bool = False, job_name: str = None) \
            -> JavaObject:
        gateway = get_gateway()
        JPythonConfigUtil = gateway.jvm.org.apache.flink.python.util.PythonConfigUtil
        # start BeamFnLoopbackWorkerPoolServicer when executed in MiniCluster
        j_configuration = get_j_env_configuration(self._j_stream_execution_environment)
        if not self._remote_mode and is_local_deployment(j_configuration):
            from pyflink.common import Configuration
            from pyflink.fn_execution.beam.beam_worker_pool_service import \
                BeamFnLoopbackWorkerPoolServicer

            jvm = gateway.jvm
            env_config = JPythonConfigUtil.getEnvironmentConfig(
                self._j_stream_execution_environment)
            parallelism = self.get_parallelism()
            if parallelism > 1 and env_config.containsKey(jvm.PythonOptions.PYTHON_ARCHIVES.key()):
                import logging
                logging.warning("Lookback mode is disabled as python archives are used and the "
                                "parallelism of the job is greater than 1. The Python user-defined "
                                "functions will be executed in an independent Python process.")
            else:
                config = Configuration(j_configuration=j_configuration)
                config.set_string(
                    "loopback.server.address", BeamFnLoopbackWorkerPoolServicer().start())

        JPythonConfigUtil.configPythonOperator(self._j_stream_execution_environment)

        gateway.jvm.org.apache.flink.python.chain.PythonOperatorChainingOptimizer.apply(
            self._j_stream_execution_environment)

        JPythonConfigUtil.setPartitionCustomOperatorNumPartitions(
            get_field_value(self._j_stream_execution_environment, "transformations"))

        j_stream_graph = self._j_stream_execution_environment.getStreamGraph(clear_transformations)
        if job_name is not None:
            j_stream_graph.setJobName(job_name)
        return j_stream_graph