Ejemplo n.º 1
0
    def _ensure_initialized(cls):
        SparkContext._ensure_initialized()
        gw = SparkContext._gateway

        java_import(gw.jvm, "org.apache.spark.streaming.*")
        java_import(gw.jvm, "org.apache.spark.streaming.api.java.*")
        java_import(gw.jvm, "org.apache.spark.streaming.api.python.*")

        # start callback server
        # getattr will fallback to JVM, so we cannot test by hasattr()
        if "_callback_server" not in gw.__dict__ or gw._callback_server is None:
            gw.callback_server_parameters.eager_load = True
            gw.callback_server_parameters.daemonize = True
            gw.callback_server_parameters.daemonize_connections = True
            gw.callback_server_parameters.port = 0
            gw.start_callback_server(gw.callback_server_parameters)
            cbport = gw._callback_server.server_socket.getsockname()[1]
            gw._callback_server.port = cbport
            # gateway with real port
            gw._python_proxy_port = gw._callback_server.port
            # get the GatewayServer object in JVM by ID
            jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
            # update the port of CallbackClient with real port
            jgws.resetCallbackClient(jgws.getCallbackClient().getAddress(), gw._python_proxy_port)

        # register serializer for TransformFunction
        # it happens before creating SparkContext when loading from checkpointing
        cls._transformerSerializer = TransformFunctionSerializer(
            SparkContext._active_spark_context, CloudPickleSerializer(), gw)
Ejemplo n.º 2
0
 def __get_random_port_for_callback_server(self) -> None:
     # See: https://github.com/bartdag/py4j/issues/147
     self._gateway.start_callback_server(
         CallbackServerParameters(port=0, daemonize=True, daemonize_connections=True))
     jgws = JavaObject("GATEWAY_SERVER", self._gateway._gateway_client)
     jgws.resetCallbackClient(jgws.getCallbackClient().getAddress(),
                              self._gateway.get_callback_server().get_listening_port())
Ejemplo n.º 3
0
    def _ensure_initialized(cls):
        SparkContext._ensure_initialized()
        gw = SparkContext._gateway

        java_import(gw.jvm, "org.apache.spark.streaming.*")
        java_import(gw.jvm, "org.apache.spark.streaming.api.java.*")
        java_import(gw.jvm, "org.apache.spark.streaming.api.python.*")

        # start callback server
        # getattr will fallback to JVM, so we cannot test by hasattr()
        if "_callback_server" not in gw.__dict__ or gw._callback_server is None:
            gw.callback_server_parameters.eager_load = True
            gw.callback_server_parameters.daemonize = True
            gw.callback_server_parameters.daemonize_connections = True
            gw.callback_server_parameters.port = 0
            gw.start_callback_server(gw.callback_server_parameters)
            cbport = gw._callback_server.server_socket.getsockname()[1]
            gw._callback_server.port = cbport
            # gateway with real port
            gw._python_proxy_port = gw._callback_server.port
            # get the GatewayServer object in JVM by ID
            jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
            # update the port of CallbackClient with real port
            jgws.resetCallbackClient(jgws.getCallbackClient().getAddress(),
                                     gw._python_proxy_port)

        # register serializer for TransformFunction
        # it happens before creating SparkContext when loading from checkpointing
        cls._transformerSerializer = TransformFunctionSerializer(
            SparkContext._active_spark_context, CloudPickleSerializer(), gw)
Ejemplo n.º 4
0
def _ensure_callback_gateway_initialized(gw):
    """ Ensure that python callback gateway is started and configured.
    Source: ``pyspark/streaming/context.py`` in ``StreamingContext._ensure_initialized``
    """
    # start callback server
    # getattr will fallback to JVM, so we cannot test by hasattr()
    if "_callback_server" not in gw.__dict__ or gw._callback_server is None:
        gw.callback_server_parameters.eager_load = True
        gw.callback_server_parameters.daemonize = True
        gw.callback_server_parameters.daemonize_connections = True
        gw.callback_server_parameters.port = 0
        gw.start_callback_server(gw.callback_server_parameters)
        cbport = gw._callback_server.server_socket.getsockname()[1]
        gw._callback_server.port = cbport
        # gateway with real port
        gw._python_proxy_port = gw._callback_server.port
        # get the GatewayServer object in JVM by ID
        jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
        # update the port of CallbackClient with real port
        jgws.resetCallbackClient(jgws.getCallbackClient().getAddress(), gw._python_proxy_port)
Ejemplo n.º 5
0
def ensure_callback_server_started(gw):
    """
    Start callback server if not already started. The callback server is needed if the Java
    driver process needs to callback into the Python driver process to execute Python code.
    """

    # getattr will fallback to JVM, so we cannot test by hasattr()
    if "_callback_server" not in gw.__dict__ or gw._callback_server is None:
        gw.callback_server_parameters.eager_load = True
        gw.callback_server_parameters.daemonize = True
        gw.callback_server_parameters.daemonize_connections = True
        gw.callback_server_parameters.port = 0
        gw.start_callback_server(gw.callback_server_parameters)
        cbport = gw._callback_server.server_socket.getsockname()[1]
        gw._callback_server.port = cbport
        # gateway with real port
        gw._python_proxy_port = gw._callback_server.port
        # get the GatewayServer object in JVM by ID
        jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
        # update the port of CallbackClient with real port
        jgws.resetCallbackClient(jgws.getCallbackClient().getAddress(), gw._python_proxy_port)
Ejemplo n.º 6
0
def ensure_callback_server_started(gw):
    """
    Start callback server if not already started. The callback server is needed if the Java
    driver process needs to callback into the Python driver process to execute Python code.
    """

    # getattr will fallback to JVM, so we cannot test by hasattr()
    if "_callback_server" not in gw.__dict__ or gw._callback_server is None:
        gw.callback_server_parameters.eager_load = True
        gw.callback_server_parameters.daemonize = True
        gw.callback_server_parameters.daemonize_connections = True
        gw.callback_server_parameters.port = 0
        gw.start_callback_server(gw.callback_server_parameters)
        cbport = gw._callback_server.server_socket.getsockname()[1]
        gw._callback_server.port = cbport
        # gateway with real port
        gw._python_proxy_port = gw._callback_server.port
        # get the GatewayServer object in JVM by ID
        jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
        # update the port of CallbackClient with real port
        jgws.resetCallbackClient(jgws.getCallbackClient().getAddress(), gw._python_proxy_port)