def run(self):
        gateway = self._initialize_gateway(self.gateway_address)
        if not gateway:
            log_error('Failed to initialize java gateway')
            return

        # noinspection PyProtectedMember
        callback_server_port = gateway._callback_server.server_socket.getsockname()[1]
        spark_context, sql_context = self._initialize_spark_contexts(gateway)
        code_executor = CodeExecutor(spark_context, sql_context, gateway.entry_point)

        try:
            gateway.entry_point.registerCallbackServerPort(callback_server_port)
            gateway.entry_point.registerCodeExecutor(code_executor)
        except Py4JError as e:
            log_error('Exception while registering codeExecutor, or callback server port: {}'.format(e))
            gateway.close()
            return

        # Wait for the end of the world
        try:
            while True:
                time.sleep(1)
        except KeyboardInterrupt:
            log_debug('Exiting on user\'s request')
            gateway.close()
Example #2
0
    def _initialize_gateway(gateway_address):
        (host, port) = gateway_address

        callback_params = CallbackServerParameters(address=host, port=0)

        gateway = JavaGateway(GatewayClient(address=host, port=port),
                              start_callback_server=True,
                              auto_convert=True,
                              callback_server_parameters=callback_params)
        try:
            java_import(gateway.jvm, "org.apache.spark.SparkEnv")
            java_import(gateway.jvm, "org.apache.spark.SparkConf")
            java_import(gateway.jvm, "org.apache.spark.api.java.*")
            java_import(gateway.jvm, "org.apache.spark.api.python.*")
            java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
            java_import(gateway.jvm, "org.apache.spark.sql.*")
            java_import(gateway.jvm, "org.apache.spark.sql.hive.*")
            java_import(gateway.jvm, "scala.Tuple2")
            java_import(gateway.jvm, "scala.collection.immutable.List")
        except Py4JError as e:
            log_error('Error while initializing java gateway: {}'.format(e))
            gateway.close()
            return None

        log_debug('Java Gateway initialized {}'.format(gateway))
        return gateway
Example #3
0
    def _initialize_spark_contexts(gateway):
        java_spark_context = gateway.entry_point.getSparkContext()
        java_spark_conf = java_spark_context.getConf()

        spark_context = SparkContext(conf=SparkConf(_jvm=gateway.jvm,
                                                    _jconf=java_spark_conf),
                                     gateway=gateway,
                                     jsc=java_spark_context)

        java_spark_sql_session = gateway.entry_point.getSparkSQLSession()
        spark_version = spark_context.version
        spark_sql_session = None
        if spark_version in [
                "2.0.0", "2.0.1", "2.0.2", "2.1.0", "2.1.1", "2.2.0"
        ]:
            from pyspark.sql import SparkSession
            java_spark_session = java_spark_sql_session.getSparkSession()
            spark_sql_session = SparkSession(spark_context, java_spark_session)
        else:
            log_error(
                "Spark version {} is not supported".format(spark_version))
            raise ValueError(
                "Spark version {} is not supported".format(spark_version))

        return spark_context, spark_sql_session
    def _initialize_gateway(gateway_address):
        (host, port) = gateway_address

        callback_params = CallbackServerParameters(address=host, port=0)

        gateway = JavaGateway(GatewayClient(address=host, port=port),
                              start_callback_server=True,
                              auto_convert=True,
                              callback_server_parameters=callback_params)
        try:
            java_import(gateway.jvm, "org.apache.spark.SparkEnv")
            java_import(gateway.jvm, "org.apache.spark.SparkConf")
            java_import(gateway.jvm, "org.apache.spark.api.java.*")
            java_import(gateway.jvm, "org.apache.spark.api.python.*")
            java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
            java_import(gateway.jvm, "org.apache.spark.sql.*")
            java_import(gateway.jvm, "org.apache.spark.sql.hive.*")
            java_import(gateway.jvm, "scala.Tuple2")
            java_import(gateway.jvm, "scala.collection.immutable.List")
        except Py4JError as e:
            log_error('Error while initializing java gateway: {}'.format(e))
            gateway.close()
            return None

        return gateway
    def run(self):
        gateway = self._initialize_gateway(self.gateway_address)
        if not gateway:
            log_error('Failed to initialize java gateway')
            return

        # noinspection PyProtectedMember
        callback_server_port = gateway._callback_server.server_socket.getsockname(
        )[1]
        spark_context, spark_session = self._initialize_spark_contexts(gateway)
        code_executor = CodeExecutor(spark_context, spark_session,
                                     gateway.entry_point)

        try:
            gateway.entry_point.registerCallbackServerPort(
                callback_server_port)
            gateway.entry_point.registerCodeExecutor(code_executor)
        except Py4JError as e:
            log_error(
                'Exception while registering codeExecutor, or callback server port: {}'
                .format(e))
            gateway.close()
            return

        # Wait for the end of the world
        try:
            while True:
                time.sleep(1)
        except KeyboardInterrupt:
            log_debug('Exiting on user\'s request')
            gateway.close()
Example #6
0
 def _supervised_execution(self, workflow_id, node_id,
                           custom_operation_code):
     # noinspection PyBroadException
     try:
         self._run_custom_code(workflow_id, node_id, custom_operation_code)
         self.entry_point.executionCompleted(workflow_id, node_id)
     except Exception as e:
         log_error('AN ERROR OCCURED ==>')
         stacktrace = traceback.format_exc()
         log_error(stacktrace)
         self.entry_point.executionFailed(workflow_id, node_id, stacktrace)
         log_error('ERROR END')