def run(self): gateway = self._initialize_gateway(self.gateway_address) if not gateway: log_error('Failed to initialize java gateway') return # noinspection PyProtectedMember callback_server_port = gateway._callback_server.server_socket.getsockname()[1] spark_context, sql_context = self._initialize_spark_contexts(gateway) code_executor = CodeExecutor(spark_context, sql_context, gateway.entry_point) try: gateway.entry_point.registerCallbackServerPort(callback_server_port) gateway.entry_point.registerCodeExecutor(code_executor) except Py4JError as e: log_error('Exception while registering codeExecutor, or callback server port: {}'.format(e)) gateway.close() return # Wait for the end of the world try: while True: time.sleep(1) except KeyboardInterrupt: log_debug('Exiting on user\'s request') gateway.close()
def _initialize_gateway(gateway_address): (host, port) = gateway_address callback_params = CallbackServerParameters(address=host, port=0) gateway = JavaGateway(GatewayClient(address=host, port=port), start_callback_server=True, auto_convert=True, callback_server_parameters=callback_params) try: java_import(gateway.jvm, "org.apache.spark.SparkEnv") java_import(gateway.jvm, "org.apache.spark.SparkConf") java_import(gateway.jvm, "org.apache.spark.api.java.*") java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") java_import(gateway.jvm, "org.apache.spark.sql.*") java_import(gateway.jvm, "org.apache.spark.sql.hive.*") java_import(gateway.jvm, "scala.Tuple2") java_import(gateway.jvm, "scala.collection.immutable.List") except Py4JError as e: log_error('Error while initializing java gateway: {}'.format(e)) gateway.close() return None log_debug('Java Gateway initialized {}'.format(gateway)) return gateway
def _initialize_spark_contexts(gateway): java_spark_context = gateway.entry_point.getSparkContext() java_spark_conf = java_spark_context.getConf() spark_context = SparkContext(conf=SparkConf(_jvm=gateway.jvm, _jconf=java_spark_conf), gateway=gateway, jsc=java_spark_context) java_spark_sql_session = gateway.entry_point.getSparkSQLSession() spark_version = spark_context.version spark_sql_session = None if spark_version in [ "2.0.0", "2.0.1", "2.0.2", "2.1.0", "2.1.1", "2.2.0" ]: from pyspark.sql import SparkSession java_spark_session = java_spark_sql_session.getSparkSession() spark_sql_session = SparkSession(spark_context, java_spark_session) else: log_error( "Spark version {} is not supported".format(spark_version)) raise ValueError( "Spark version {} is not supported".format(spark_version)) return spark_context, spark_sql_session
def _initialize_gateway(gateway_address): (host, port) = gateway_address callback_params = CallbackServerParameters(address=host, port=0) gateway = JavaGateway(GatewayClient(address=host, port=port), start_callback_server=True, auto_convert=True, callback_server_parameters=callback_params) try: java_import(gateway.jvm, "org.apache.spark.SparkEnv") java_import(gateway.jvm, "org.apache.spark.SparkConf") java_import(gateway.jvm, "org.apache.spark.api.java.*") java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") java_import(gateway.jvm, "org.apache.spark.sql.*") java_import(gateway.jvm, "org.apache.spark.sql.hive.*") java_import(gateway.jvm, "scala.Tuple2") java_import(gateway.jvm, "scala.collection.immutable.List") except Py4JError as e: log_error('Error while initializing java gateway: {}'.format(e)) gateway.close() return None return gateway
def run(self): gateway = self._initialize_gateway(self.gateway_address) if not gateway: log_error('Failed to initialize java gateway') return # noinspection PyProtectedMember callback_server_port = gateway._callback_server.server_socket.getsockname( )[1] spark_context, spark_session = self._initialize_spark_contexts(gateway) code_executor = CodeExecutor(spark_context, spark_session, gateway.entry_point) try: gateway.entry_point.registerCallbackServerPort( callback_server_port) gateway.entry_point.registerCodeExecutor(code_executor) except Py4JError as e: log_error( 'Exception while registering codeExecutor, or callback server port: {}' .format(e)) gateway.close() return # Wait for the end of the world try: while True: time.sleep(1) except KeyboardInterrupt: log_debug('Exiting on user\'s request') gateway.close()
def _supervised_execution(self, workflow_id, node_id, custom_operation_code): # noinspection PyBroadException try: self._run_custom_code(workflow_id, node_id, custom_operation_code) self.entry_point.executionCompleted(workflow_id, node_id) except Exception as e: log_error('AN ERROR OCCURED ==>') stacktrace = traceback.format_exc() log_error(stacktrace) self.entry_point.executionFailed(workflow_id, node_id, stacktrace) log_error('ERROR END')