# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from py4j.java_gateway import java_import, JavaGateway, GatewayClient # start JVM gateway client = GatewayClient(address='127.0.0.1', port=${JVM_GATEWAY_PORT}) gateway = JavaGateway(client) java_import(gateway.jvm, "org.apache.zeppelin.display.Input") intp = gateway.entry_point
from py4j.java_gateway import java_import, JavaGateway, GatewayClient from pyspark.conf import SparkConf from pyspark.context import SparkContext # for back compatibility from pyspark.sql import SQLContext # start JVM gateway if "PY4J_GATEWAY_SECRET" in os.environ: from py4j.java_gateway import GatewayParameters gateway_secret = os.environ["PY4J_GATEWAY_SECRET"] gateway = JavaGateway(gateway_parameters=GatewayParameters(address="${JVM_GATEWAY_ADDRESS}", port=${JVM_GATEWAY_PORT}, auth_token=gateway_secret, auto_convert=True)) else: gateway = JavaGateway(GatewayClient(address="${JVM_GATEWAY_ADDRESS}", port=${JVM_GATEWAY_PORT}), auto_convert=True) java_import(gateway.jvm, "org.apache.spark.SparkEnv") java_import(gateway.jvm, "org.apache.spark.SparkConf") java_import(gateway.jvm, "org.apache.spark.api.java.*") java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "org.apache.spark.ml.python.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") java_import(gateway.jvm, "org.apache.spark.resource.*") intp = gateway.entry_point if intp.isSpark3(): warnings.filterwarnings(action='ignore', module='pyspark.util') jsc = intp.getJavaSparkContext()
completionList.add(completionItem) if len(completionList) <= 0: self.interpreterObject.setStatementsFinished("", False) else: result = json.dumps( list( filter(lambda x: not re.match("^__.*", x), list(completionList)))) self.interpreterObject.setStatementsFinished(result, False) output = Logger() sys.stdout = output sys.stderr = output client = GatewayClient(port=int(sys.argv[1])) sparkVersion = SparkVersion(int(sys.argv[2])) if sparkVersion.isSpark2(): from pyspark.sql import SparkSession else: from pyspark.sql import SchemaRDD if sparkVersion.isAutoConvertEnabled(): gateway = JavaGateway(client, auto_convert=True) else: gateway = JavaGateway(client) java_import(gateway.jvm, "org.apache.spark.SparkEnv") java_import(gateway.jvm, "org.apache.spark.SparkConf") java_import(gateway.jvm, "org.apache.spark.api.java.*")
def setUp(self): self.p = start_example_app_process() gateway_client = GatewayClient() self.gateway = JavaGateway() self.gateway.set_gateway_client(gateway_client)
""" Import a python class where its identity is not known until runtime. :param cls: The fully qualified path of the class including module prefixes, e.g. sparkjobserver.api.SparkJob :return: The constructor for the class, as a function which can be called to instantiate an instance. """ (module_name, class_name) = cls.rsplit('.', 1) module = import_module(module_name) c = getattr(module, class_name) return c if __name__ == "__main__": port = int(sys.argv[1]) gateway = JavaGateway(GatewayClient(port=port), auto_convert=True) entry_point = gateway.entry_point imports = entry_point.getPy4JImports() for i in imports: java_import(gateway.jvm, i) context_config =\ ConfigFactory.parse_string(entry_point.contextConfigAsHocon()) job_id = entry_point.jobId() job_env = JobEnvironment(job_id, None, context_config) job_config = ConfigFactory.parse_string(entry_point.jobConfigAsHocon()) job_class = import_class(entry_point.jobClass()) job = job_class() jcontext = entry_point.context() jspark_conf = entry_point.sparkConf()
def launch_gateway(): SPARK_HOME = os.environ["SPARK_HOME"] gateway_port = -1 if "PYSPARK_GATEWAY_PORT" in os.environ: gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"]) else: # Launch the Py4j gateway using Spark's run command so that we pick up the # proper classpath and settings from spark-env.sh on_windows = platform.system() == "Windows" script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit" submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS") submit_args = submit_args if submit_args is not None else "" submit_args = shlex.split(submit_args) command = [os.path.join(SPARK_HOME, script)] + submit_args + ["pyspark-shell"] if not on_windows: # Don't send ctrl-c / SIGINT to the Java gateway: def preexec_func(): signal.signal(signal.SIGINT, signal.SIG_IGN) proc = Popen(command, stdout=PIPE, stdin=PIPE, preexec_fn=preexec_func) else: # preexec_fn not supported on Windows proc = Popen(command, stdout=PIPE, stdin=PIPE) try: # Determine which ephemeral port the server started on: gateway_port = proc.stdout.readline() gateway_port = int(gateway_port) except ValueError: # Grab the remaining lines of stdout (stdout, _) = proc.communicate() exit_code = proc.poll() error_msg = "Launching GatewayServer failed" error_msg += " with exit code %d!\n" % exit_code if exit_code else "!\n" error_msg += "Warning: Expected GatewayServer to output a port, but found " if gateway_port == "" and stdout == "": error_msg += "no output.\n" else: error_msg += "the following:\n\n" error_msg += "--------------------------------------------------------------\n" error_msg += gateway_port + stdout error_msg += "--------------------------------------------------------------\n" raise Exception(error_msg) # In Windows, ensure the Java child processes do not linger after Python has exited. # In UNIX-based systems, the child process can kill itself on broken pipe (i.e. when # the parent process' stdin sends an EOF). In Windows, however, this is not possible # because java.lang.Process reads directly from the parent process' stdin, contending # with any opportunity to read an EOF from the parent. Note that this is only best # effort and will not take effect if the python process is violently terminated. if on_windows: # In Windows, the child process here is "spark-submit.cmd", not the JVM itself # (because the UNIX "exec" command is not available). This means we cannot simply # call proc.kill(), which kills only the "spark-submit.cmd" process but not the # JVMs. Instead, we use "taskkill" with the tree-kill option "/t" to terminate all # child processes in the tree (http://technet.microsoft.com/en-us/library/bb491009.aspx) def killChild(): Popen(["cmd", "/c", "taskkill", "/f", "/t", "/pid", str(proc.pid)]) atexit.register(killChild) # Create a thread to echo output from the GatewayServer, which is required # for Java log output to show up: class EchoOutputThread(Thread): def __init__(self, stream): Thread.__init__(self) self.daemon = True self.stream = stream def run(self): while True: line = self.stream.readline() sys.stderr.write(line) EchoOutputThread(proc.stdout).start() # Connect to the gateway gateway = JavaGateway(GatewayClient(port=gateway_port), auto_convert=False) # Import the classes used by PySpark java_import(gateway.jvm, "org.apache.spark.SparkConf") java_import(gateway.jvm, "org.apache.spark.api.java.*") java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") java_import(gateway.jvm, "org.apache.spark.sql.SQLContext") java_import(gateway.jvm, "org.apache.spark.sql.hive.HiveContext") java_import(gateway.jvm, "org.apache.spark.sql.hive.LocalHiveContext") java_import(gateway.jvm, "org.apache.spark.sql.hive.TestHiveContext") java_import(gateway.jvm, "scala.Tuple2") return gateway
import ast from time import sleep from py4j.java_gateway import java_import, JavaGateway, GatewayClient from py4j.protocol import Py4JNetworkError from pyspark import SparkConf from pyspark.context import SparkContext from pyspark.sql import SparkSession, SQLContext import logging logging.basicConfig(filename='logs/python.log', level=logging.INFO) logging.info('Starting python module for sparkle notebook') # Connect to the gateway gateway = JavaGateway(GatewayClient(port=int(sys.argv[1])), auto_convert=True) # Import the classes used by PySpark java_import(gateway.jvm, "org.apache.spark.SparkConf") java_import(gateway.jvm, "org.apache.spark.api.java.*") java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "org.apache.spark.ml.python.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") # TODO(davies): move into sql java_import(gateway.jvm, "org.apache.spark.sql.*") java_import(gateway.jvm, "org.apache.spark.sql.hive.*") java_import(gateway.jvm, "scala.Tuple2") python_kernel = gateway.entry_point # auto generated variable counter var_counter = 0
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pyspark import SparkContext from pyspark import SparkConf from pyspark.sql import SQLContext from pyspark.sql import DataFrame from py4j.java_gateway import JavaGateway, GatewayClient, java_import from py4j.protocol import Py4JJavaError # gateway_address and gateway_port are set in the kernel gateway = JavaGateway(GatewayClient(address=gateway_address, port=gateway_port), start_callback_server=False, auto_convert=True) java_spark_context = gateway.entry_point.getSparkContext() java_spark_conf = gateway.entry_point.getSparkConf() java_import(gateway.jvm, "org.apache.spark.SparkEnv") java_import(gateway.jvm, "org.apache.spark.SparkConf") java_import(gateway.jvm, "org.apache.spark.api.java.*") java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") java_import(gateway.jvm, "org.apache.spark.sql.*") java_import(gateway.jvm, "org.apache.spark.sql.hive.*") java_import(gateway.jvm, "scala.Tuple2") java_import(gateway.jvm, "scala.collection.immutable.List")
if replName is None: self.z.unregisterHook(event) else: self.z.unregisterHook(event, replName) def registerNoteHook(self, event, cmd, noteId, replName=None): if replName is None: self.z.registerNoteHook(event, cmd, noteId) else: self.z.registerNoteHook(event, cmd, noteId, replName) def unregisterNoteHook(self, event, noteId, replName=None): if replName is None: self.z.unregisterNoteHook(event, noteId) else: self.z.unregisterNoteHook(event, noteId, replName) # start JVM gateway if "PY4J_GATEWAY_SECRET" in os.environ: from py4j.java_gateway import GatewayParameters gateway_secret = os.environ["PY4J_GATEWAY_SECRET"] gateway = JavaGateway(gateway_parameters=GatewayParameters( port=${JVM_GATEWAY_PORT}, auth_token=gateway_secret, auto_convert=True)) else: gateway = JavaGateway(GatewayClient(port=${JVM_GATEWAY_PORT}), auto_convert=True) java_import(gateway.jvm, "org.apache.zeppelin.display.Input") intp = gateway.entry_point z = __zeppelin__ = PyZeppelinContext(intp.getZeppelinContext())
def get_departures(self): # set gzip header cherrypy.response.headers['Content-Type'] = 'application/gzip' # create the return tuple return_tuple = {} return_tuple['departures'] = [] return_tuple['warning'] = "" return_tuple['error'] = "" translator = Translator(Config().get_param("default_language")) # parse json encoded input options = helper.convert_dict_values_to_utf8(cherrypy.request.json) # user language language = "" if options.has_key("language") == True: language = options['language'] # if the user sends a language, which is not german, take the default language setting if language != "de": language = Config().get_param("default_language") # initialize the translator object with the user's choosen language translator = Translator(language) # check latitude, longitude and vehicles parameters try: lat = float(options['lat']) except KeyError as e: return_tuple['error'] = translator.translate( "message", "no_latitude_value") return helper.zip_data(return_tuple) except ValueError as e: return_tuple['error'] = translator.translate( "message", "no_latitude_value") return helper.zip_data(return_tuple) try: lon = float(options['lon']) except KeyError as e: return_tuple['error'] = translator.translate( "message", "no_longitude_value") return helper.zip_data(return_tuple) except ValueError as e: return_tuple['error'] = translator.translate( "message", "no_longitude_value") return helper.zip_data(return_tuple) try: vehicles = options['vehicles'].split("+") except KeyError as e: vehicles = [] # get the nearest stations for this coordinates and take the first one gateway = JavaGateway( GatewayClient(port=Config().get_param("gateway_port")), auto_field=True) main_point = gateway.entry_point closest_stations_result = main_point.getNearestStations( geometry.convert_coordinate_to_int(lat), geometry.convert_coordinate_to_int(lon)) if closest_stations_result.status.toString() == "INVALID_STATION": return_tuple['error'] = translator.translate( "message", "no_station_for_this_coordinates") return helper.zip_data(return_tuple) if closest_stations_result.status.toString() == "SERVICE_DOWN": return_tuple['error'] = translator.translate( "message", "bahn_server_down") return helper.zip_data(return_tuple) if closest_stations_result.locations == None or len( closest_stations_result.locations) == 0: return_tuple['error'] = translator.translate( "message", "no_station_for_this_coordinates") return helper.zip_data(return_tuple) # get departures for station sfinder = StationFinder(translator) station = sfinder.choose_station_by_vehicle_type( closest_stations_result.locations, lat, lon, vehicles) departures_result = main_point.getDepartures(station.id) date_format = gateway.jvm.java.text.SimpleDateFormat( "HH:mm", gateway.jvm.java.util.Locale.GERMAN) for station_departure in departures_result.stationDepartures: for departure in station_departure.departures: try: dep_entry = {} dep_entry['nr'] = "%s%s" % (departure.line.product.code, departure.line.label) dep_entry['to'] = departure.destination.name dep_entry['time'] = date_format.format( departure.plannedTime) # remaining time duration = departure.plannedTime.getTime() / 1000 - int( time.time()) minutes, seconds = divmod(duration, 60) dep_entry['remaining'] = minutes return_tuple['departures'].append(dep_entry) except Exception as e: pass # convert return_tuple to json and zip it, before returning return helper.zip_data(return_tuple)
def setUp(self): self.p = start_example_app_process() # This is to ensure that the server is started before connecting to it! time.sleep(1) gateway_client = GatewayClient() self.gateway = JavaGateway(gateway_client=gateway_client)
def start(self, cmd, num_containers=1, virtual_cores=1, memory=128, files=None, envvars=None, app_name="knit", queue="default", checks=True): """ Method to start a yarn app with a distributed shell Parameters ---------- cmd: str command to run in each yarn container num_containers: int Number of containers YARN should request (default: 1) * A container should be requested with the number of cores it can saturate, i.e. * the average number of threads it expects to have runnable at a time. virtual_cores: int Number of virtual cores per container (default: 1) * A node's capacity should be configured with virtual cores equal to * its number of physical cores. memory: int Memory per container (default: 128) * The unit for memory is megabytes. files: list list of files to be include in each container. If starting with `hdfs://`, assume these already exist in HDFS and don't need uploading. Otherwise, if hdfs3 is installed, existence of the file on HDFS will be checked to see if upload is needed. Files ending with `.zip` will be decompressed in the container before launch as a directory with the same name as the file: if myarc.zip contains files inside a directory stuff/, to the container they will appear at ./myarc.zip/stuff/* . envvars: dict Environment variables to pass to AM *and* workers. Both keys and values must be strings only. app_name: String Application name shown in YARN (default: "knit") queue: String RM Queue to use while scheduling (default: "default") checks: bool=True Whether to run pre-flight checks before submitting app to YARN Returns ------- applicationId: str A yarn application ID string """ files = files or [] envvars = envvars or {'KNIT_LANG': self.lang} for k, v in envvars.items(): if not isinstance(k, str) or not isinstance(v, str): raise ValueError('Environment must contain only strings (%s)' % ((k, v), )) if self.app_id: raise ValueError('Already started') if not isinstance(memory, int): raise KnitException("Memory argument must be an integer") if files: if not isinstance(files, list): raise KnitException("File argument must be a list of strings") if checks: self._pre_flight_checks(num_containers, virtual_cores, memory, files, queue) # From https://github.com/apache/spark/blob/d83c2f9f0b08d6d5d369d9fae04cdb15448e7f0d/python/pyspark/java_gateway.py # thank you spark ## Socket for PythonGatewayServer to communicate its port to us callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) callback_socket.bind(('127.0.0.1', 0)) callback_socket.listen(1) callback_host, callback_port = callback_socket.getsockname() if not os.path.exists(self.JAR_FILE_PATH): raise KnitException('JAR file %s does not exists - please build' ' with maven' % self.JAR_FILE_PATH) args = [ "hadoop", "jar", self.JAR_FILE_PATH, self.JAVA_APP, "--callbackHost", str(callback_host), "--callbackPort", str(callback_port) ] ## Launch the Java gateway. # We open a pipe to stdin so that the Java gateway can die when the pipe is broken if not on_windows: # Don't send ctrl-c / SIGINT to the Java gateway: def preexec_func(): signal.signal(signal.SIGINT, signal.SIG_IGN) proc = Popen(args, stdin=PIPE, preexec_fn=preexec_func) else: # preexec_fn not supported on Windows proc = Popen(args, stdin=PIPE) self.proc = proc gateway_port = None # We use select() here in order to avoid blocking indefinitely if the # subprocess dies before connecting long_timeout = 60 while gateway_port is None and proc.poll( ) is None and long_timeout > 0: timeout = 1 # (seconds) readable, _, _ = select.select([callback_socket], [], [], timeout) if callback_socket in readable: gateway_connection = callback_socket.accept()[0] # Determine which ephemeral port the server started on: gateway_port = read_int(gateway_connection.makefile(mode="rb")) gateway_connection.close() callback_socket.close() long_timeout -= 1 if gateway_port is None: raise Exception( "The JVM Knit client failed to launch successfully." " Check that java is installed and the Knit JAR" " file exists.") gateway = JavaGateway(GatewayClient(port=gateway_port), auto_convert=True) self.client = gateway.entry_point self.client_gateway = gateway logger.debug("Files submitted: %s" % files) upfiles = [ f for f in files if (not f.startswith('hdfs://') and self.check_needs_upload(f)) ] logger.debug("Files to upload: %s" % upfiles) jfiles = ListConverter().convert(upfiles, gateway._gateway_client) jenv = MapConverter().convert(envvars, gateway._gateway_client) self.app_id = self.client.start(jfiles, jenv, app_name, queue) ## Wait for AM to appear long_timeout = 100 master_rpcport = -1 while master_rpcport == -1: master_rpcport = self.client.masterRPCPort() time.sleep(0.2) long_timeout -= 0.2 if long_timeout < 0: break if master_rpcport in [-1, 'N/A']: raise Exception( """The application master JVM process failed to report back. This can mean: - that the YARN cluster cannot scheduler adequate resources - check k.yarn_api.cluster_metrics() and other diagnostic methods; - that the ApplicationMaster crashed - check the application logs, k.logs(); - that the cluster is otherwise unhealthy - check the RM and NN logs (use k.yarn_api.system_logs() to find these on a one-node system """) master_rpchost = self.client.masterRPCHost() gateway = JavaGateway(GatewayClient(address=master_rpchost, port=master_rpcport), auto_convert=True) self.master = gateway.entry_point rfiles = [ triple_slash(f) if f.startswith('hdfs://') else '/'.join( ['hdfs://', self.hdfs_home, '.knitDeps', os.path.basename(f)]) for f in files ] logger.debug("Resource files: %s" % rfiles) jfiles = ListConverter().convert(rfiles, gateway._gateway_client) jenv = MapConverter().convert(envvars, gateway._gateway_client) self.master.init(jfiles, jenv, cmd, num_containers, virtual_cores, memory) return self.app_id
completionList = self.getMethodCompletion(objName, methodName) if completionList is None or len(completionList) <= 0: self.interpreter.setStatementsFinished("", False) else: result = json.dumps( list( filter(lambda x: not re.match("^__.*", x), list(completionList)))) self.interpreter.setStatementsFinished(result, False) host = sys.argv[1] port = int(sys.argv[2]) client = GatewayClient(address=host, port=port) gateway = JavaGateway(client, auto_convert=True) intp = gateway.entry_point # redirect stdout/stderr to java side so that PythonInterpreter can capture the python execution result output = Logger() sys.stdout = output sys.stderr = output _zcUserQueryNameSpace = {} completion = PythonCompletion(intp, _zcUserQueryNameSpace) _zcUserQueryNameSpace["__zeppelin_completion__"] = completion _zcUserQueryNameSpace["gateway"] = gateway from zeppelin_context import PyZeppelinContext if intp.getZeppelinContext():
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from py4j.java_gateway import java_import, JavaGateway, GatewayClient from pyspark.conf import SparkConf from pyspark.context import SparkContext # for back compatibility from pyspark.sql import SQLContext # start JVM gateway client = GatewayClient(port=${JVM_GATEWAY_PORT}) gateway = JavaGateway(client, auto_convert=True) java_import(gateway.jvm, "org.apache.spark.SparkEnv") java_import(gateway.jvm, "org.apache.spark.SparkConf") java_import(gateway.jvm, "org.apache.spark.api.java.*") java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") intp = gateway.entry_point jsc = intp.getJavaSparkContext() java_import(gateway.jvm, "org.apache.spark.sql.*") java_import(gateway.jvm, "org.apache.spark.sql.hive.*") java_import(gateway.jvm, "scala.Tuple2")
def launch_gateway(): SPARK_HOME = os.environ["SPARK_HOME"] gateway_port = -1 if "PYSPARK_GATEWAY_PORT" in os.environ: gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"]) else: # Launch the Py4j gateway using Spark's run command so that we pick up the # proper classpath and settings from spark-env.sh on_windows = platform.system() == "Windows" script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit" submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS") submit_args = submit_args if submit_args is not None else "" submit_args = shlex.split(submit_args) command = [os.path.join(SPARK_HOME, script)] + submit_args + ["pyspark-shell"] if not on_windows: # Don't send ctrl-c / SIGINT to the Java gateway: def preexec_func(): signal.signal(signal.SIGINT, signal.SIG_IGN) proc = Popen(command, stdout=PIPE, stdin=PIPE, preexec_fn=preexec_func) else: # preexec_fn not supported on Windows proc = Popen(command, stdout=PIPE, stdin=PIPE) try: # Determine which ephemeral port the server started on: gateway_port = proc.stdout.readline() gateway_port = int(gateway_port) except ValueError: # Grab the remaining lines of stdout (stdout, _) = proc.communicate() exit_code = proc.poll() error_msg = "Launching GatewayServer failed" error_msg += " with exit code %d!\n" % exit_code if exit_code else "!\n" error_msg += "Warning: Expected GatewayServer to output a port, but found " if gateway_port == "" and stdout == "": error_msg += "no output.\n" else: error_msg += "the following:\n\n" error_msg += "--------------------------------------------------------------\n" error_msg += gateway_port + stdout error_msg += "--------------------------------------------------------------\n" raise Exception(error_msg) # Create a thread to echo output from the GatewayServer, which is required # for Java log output to show up: class EchoOutputThread(Thread): def __init__(self, stream): Thread.__init__(self) self.daemon = True self.stream = stream def run(self): while True: line = self.stream.readline() sys.stderr.write(line) EchoOutputThread(proc.stdout).start() # Connect to the gateway gateway = JavaGateway(GatewayClient(port=gateway_port), auto_convert=False) # Import the classes used by PySpark java_import(gateway.jvm, "org.apache.spark.SparkConf") java_import(gateway.jvm, "org.apache.spark.api.java.*") java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") java_import(gateway.jvm, "org.apache.spark.sql.SQLContext") java_import(gateway.jvm, "org.apache.spark.sql.hive.HiveContext") java_import(gateway.jvm, "org.apache.spark.sql.hive.LocalHiveContext") java_import(gateway.jvm, "org.apache.spark.sql.hive.TestHiveContext") java_import(gateway.jvm, "scala.Tuple2") return gateway
if QMessageBox.question(None, '', 'Are you sure to quit?', QMessageBox.Yes | QMessageBox.No, QMessageBox.No) == QMessageBox.Yes: QApplication.quit() def closeEvent(self, event): self.closing.emit() super(GameRacko, self).closeEvent(event) if __name__ == "__main__": host = '127.0.0.1' port_number = 25333 while port_number < 25335: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(('', 0)) port_number = s.getsockname()[1] s.close() try: subprocess.Popen(['java', '-jar', 'RackoComputerPlayersGateway.jar', str(port_number)]) time.sleep(1) except: sys.exit() gateway_server = JavaGateway(GatewayClient(address=host, port=port_number)) app = QApplication(sys.argv) window = GameRacko(gateway_server) window.show() while app.exec_() > 0: time.sleep(1) gateway_server.shutdown() sys.exit()
def main(): sys_stdin = sys.stdin sys_stdout = sys.stdout sys_stderr = sys.stderr if sys.version >= '3': sys.stdin = io.StringIO() else: sys.stdin = cStringIO.StringIO() sys.stdout = UnicodeDecodingStringIO() sys.stderr = UnicodeDecodingStringIO() spark_major_version = os.getenv("LIVY_SPARK_MAJOR_VERSION") try: listening_port = 0 if os.environ.get("LIVY_TEST") != "true": #Load spark into the context exec('from pyspark.shell import sc', global_dict) exec('from pyspark.shell import sqlContext', global_dict) exec('from pyspark.sql import HiveContext', global_dict) exec('from pyspark.streaming import StreamingContext', global_dict) exec('import pyspark.cloudpickle as cloudpickle', global_dict) if spark_major_version >= "2": exec('from pyspark.shell import spark', global_dict) #Start py4j callback server from py4j.protocol import ENTRY_POINT_OBJECT_ID from py4j.java_gateway import JavaGateway, GatewayClient, CallbackServerParameters gateway_client_port = int(os.environ.get("PYSPARK_GATEWAY_PORT")) gateway = JavaGateway(GatewayClient(port=gateway_client_port)) gateway.start_callback_server( callback_server_parameters=CallbackServerParameters(port=0)) socket_info = gateway._callback_server.server_socket.getsockname() listening_port = socket_info[1] pyspark_job_processor = PySparkJobProcessorImpl() gateway.gateway_property.pool.dict[ ENTRY_POINT_OBJECT_ID] = pyspark_job_processor global local_tmp_dir_path, job_context local_tmp_dir_path = tempfile.mkdtemp() job_context = JobContextImpl() print(sys.stdout.getvalue(), file=sys_stderr) print(sys.stderr.getvalue(), file=sys_stderr) clearOutputs() print('READY(port=' + str(listening_port) + ')', file=sys_stdout) sys_stdout.flush() while True: line = sys_stdin.readline() if line == '': break elif line == '\n': continue try: msg = json.loads(line) except ValueError: LOG.error('failed to parse message', exc_info=True) continue try: msg_type = msg['msg_type'] except KeyError: LOG.error('missing message type', exc_info=True) continue try: content = msg['content'] except KeyError: LOG.error('missing content', exc_info=True) continue if not isinstance(content, dict): LOG.error('content is not a dictionary') continue try: handler = msg_type_router[msg_type] except KeyError: LOG.error('unknown message type: %s', msg_type) continue response = handler(content) try: response = json.dumps(response) except ValueError: response = json.dumps({ 'msg_type': 'inspect_reply', 'content': { 'status': 'error', 'ename': 'ValueError', 'evalue': 'cannot json-ify %s' % response, 'traceback': [], } }) print(response, file=sys_stdout) sys_stdout.flush() finally: if os.environ.get("LIVY_TEST") != "true" and 'sc' in global_dict: gateway.shutdown_callback_server() shutil.rmtree(local_tmp_dir_path) global_dict['sc'].stop() sys.stdin = sys_stdin sys.stdout = sys_stdout sys.stderr = sys_stderr
def __init__(self, versionNumber): self.version = versionNumber def isAutoConvertEnabled(self): return self.version >= self.SPARK_1_4_0 def isImportAllPackageUnderSparkSql(self): return self.version >= self.SPARK_1_3_0 output = Logger() errorOutput = ErrorLogger() sys.stdout = output sys.stderr = errorOutput try: client = GatewayClient(port=int(sys.argv[1]), gateway_parameters=GatewayParameters(port = int(sys.argv[1]), auto_convert = True, auth_token = sys.argv[3])) except: client = GatewayClient(port=int(sys.argv[1])) sparkVersion = SparkVersion(int(sys.argv[2])) if sparkVersion.isAutoConvertEnabled(): try: gateway = JavaGateway(client, auto_field = True, auto_convert = True, gateway_parameters=GatewayParameters(port = int(sys.argv[1]), auto_convert = True, auth_token = sys.argv[3])) except: gateway = JavaGateway(client, auto_convert = True) else: gateway = JavaGateway(client) java_import(gateway.jvm, "org.apache.spark.SparkEnv")
def launch_gateway(conf=None): """ launch jvm gateway :param conf: configuration that ml_runner must have :return: """ if "PYANGEL_GATEWAY_PORT" in os.environ: gateway_port = int(os.environ["PYANGEL_GATEWAY_PORT"]) else: ANGEL_HOME = _find_angel_home() # Launch the Py4j gateway if os.environ.get("PYANGEL_LOCAL_MODE") == "True": script = "./bin/angel-local-submit" else: script = "./bin/angel-submit" command = [os.path.join(ANGEL_HOME, script)] if conf: for k, v in conf.getAll(): command += ['--conf', '%s=%s' % (k, v)] submit_args = os.environ.get( "PYANGEL_SUBMIT_ARGS", "--angel.app.submit.class com.tencent.angel.api.python.PythonGatewayServer" ) command = command + shlex.split(submit_args) # Start a socket that will be used by PythonGatewayServer to communicate its port to python sub-proc callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) callback_socket.bind(('127.0.0.1', 0)) callback_socket.listen(1) callback_host, callback_port = callback_socket.getsockname() env = dict(os.environ) env['_PYANGEL_CALLBACK_HOST'] = callback_host env['_PYANGEL_CALLBACK_PORT'] = str(callback_port) # Don't send ctrl-c / SIGINT to the Java gateway: def preexec_func(): signal.signal(signal.SIGINT, signal.SIG_IGN) proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env) gateway_port = None # We use select() here in order to avoid blocking indefinitely if the subprocess dies # before connecting while gateway_port is None and proc.poll() is None: timeout = 1 # (seconds) readable, _, _ = select.select([callback_socket], [], [], timeout) if callback_socket in readable: gateway_connection = callback_socket.accept()[0] # Determine which ephemeral port the server started on: gateway_port = read_int(gateway_connection.makefile(mode="rb")) gateway_connection.close() callback_socket.close() if gateway_port is None: raise Exception( "Java gateway process exited before sending the driver its port number" ) # Connect to the gateway gateway = JavaGateway(GatewayClient(port=gateway_port), auto_convert=True) return gateway
sys.path.insert(1, here) import imp, traceback, __builtin__ try: import simplejson as json except: import json from ConfigParser import RawConfigParser from StringIO import StringIO from UserDict import DictMixin from py4j.java_gateway import GatewayClient, JavaGateway import vmsg, imgfx try: pms except NameError: host, port = os.environ['JGATEWAY'].split(':') hascb, cbport = (True, int(os.getenv('JCLIENT'))) if 'JCLIENT' in os.environ else (False, None) gateway_client = GatewayClient(address=host, port=int(port)) gateway = JavaGateway(gateway_client, start_callback_server=hascb, python_proxy_port=cbport, auto_convert=True) __builtin__.pms = gateway.entry_point __builtin__.pms.gateway_client = gateway_client __builtin__.pms._addItem = pms.addItem __builtin__.pms._addPath = pms.addPath __builtin__.pms._setEnv = pms.setEnv __builtin__.pms._addPlayer = pms.addPlayer # constants from net.pms.formats.Format: __builtin__.PMS_AUDIO = 1 __builtin__.PMS_IMAGE = 2 __builtin__.PMS_VIDEO = 4 __builtin__.PMS_UNKNOWN = 8 __builtin__.PMS_PLAYLIST = 16 __builtin__.PMS_ISO = 32 __builtin__.PMS_CUSTOM = 64
def test_gateway_client(self): gateway_client = GatewayClient(port=DEFAULT_PORT) self.gateway = JavaGateway(gateway_client=gateway_client) i = self.gateway.jvm.System.currentTimeMillis() self.assertTrue(i > 0)
# The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from py4j.java_gateway import JavaGateway from py4j.java_gateway import java_import, JavaGateway, GatewayClient client = GatewayClient(port=%PORT%) gateway = JavaGateway(client) java_import(gateway.jvm, "org.apache.zeppelin.display.Input") class PyZeppelinContext(): paramOption = gateway.jvm.org.apache.zeppelin.display.Input.ParamOption javaList = gateway.jvm.java.util.ArrayList def __init__(self, zc): self.z = zc def input(self, name, defaultValue=""): return self.z.getGui().input(name, defaultValue) def select(self, name, options, defaultValue=""): javaOptions = gateway.new_array(self.paramOption, len(options))
list( filter(lambda x: not re.match("^__.*", x), list(completionList)))) self.interpreter.setStatementsFinished(result, False) host = sys.argv[1] port = int(sys.argv[2]) if "PY4J_GATEWAY_SECRET" in os.environ: from py4j.java_gateway import GatewayParameters gateway_secret = os.environ["PY4J_GATEWAY_SECRET"] gateway = JavaGateway(gateway_parameters=GatewayParameters( address=host, port=port, auth_token=gateway_secret, auto_convert=True)) else: gateway = JavaGateway(GatewayClient(address=host, port=port), auto_convert=True) intp = gateway.entry_point _zcUserQueryNameSpace = {} completion = PythonCompletion(intp, _zcUserQueryNameSpace) _zcUserQueryNameSpace["__zeppelin_completion__"] = completion _zcUserQueryNameSpace["gateway"] = gateway from zeppelin_context import PyZeppelinContext if intp.getZeppelinContext(): z = __zeppelin__ = PyZeppelinContext(intp.getZeppelinContext(), gateway) __zeppelin__._setup_matplotlib() _zcUserQueryNameSpace["z"] = z _zcUserQueryNameSpace["__zeppelin__"] = __zeppelin__
def start(self, cmd, num_containers=1, virtual_cores=1, memory=128, env="", files=[], app_name="knit", queue="default", checks=True, lang='C.UTF-8'): """ Method to start a yarn app with a distributed shell Parameters ---------- cmd: str command to run in each yarn container num_containers: int Number of containers YARN should request (default: 1) * A container should be requested with the number of cores it can saturate, i.e. * the average number of threads it expects to have runnable at a time. virtual_cores: int Number of virtual cores per container (default: 1) * A node's capacity should be configured with virtual cores equal to * its number of physical cores. memory: int Memory per container (default: 128) * The unit for memory is megabytes. env: string Full Path to zipped Python environment files: list list of files to be include in each container app_name: String Application name shown in YARN (default: "knit") queue: String RM Queue to use while scheduling (default: "default") checks: bool=True Whether to run pre-flight checks before submitting app to YARN lang: str Environment variable language setting, required for ``click`` to successfully read from the shell. Returns ------- applicationId: str A yarn application ID string """ if self.app_id: raise ValueError('Already started') if not isinstance(memory, int): raise KnitException("Memory argument must be an integer") if files: if not isinstance(files, list): raise KnitException("File argument must be a list of strings") if checks: self._pre_flight_checks(num_containers, virtual_cores, memory, env, files, queue) # From https://github.com/apache/spark/blob/d83c2f9f0b08d6d5d369d9fae04cdb15448e7f0d/python/pyspark/java_gateway.py # thank you spark # Start a socket that will be used by PythonGatewayServer to communicate its port to us callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) callback_socket.bind(('127.0.0.1', 0)) callback_socket.listen(1) callback_host, callback_port = callback_socket.getsockname() if not os.path.exists(self.JAR_FILE_PATH): raise KnitException('JAR file %s does not exists - please build' ' with maven' % self.JAR_FILE_PATH) args = ["hadoop", "jar", self.JAR_FILE_PATH, self.JAVA_APP, "--callbackHost", str(callback_host), "--callbackPort", str(callback_port)] # Launch the Java gateway. # We open a pipe to stdin so that the Java gateway can die when the pipe is broken if not on_windows: # Don't send ctrl-c / SIGINT to the Java gateway: def preexec_func(): signal.signal(signal.SIGINT, signal.SIG_IGN) proc = Popen(args, stdin=PIPE, preexec_fn=preexec_func) else: # preexec_fn not supported on Windows proc = Popen(args, stdin=PIPE) self.proc = proc gateway_port = None # We use select() here in order to avoid blocking indefinitely if the # subprocess dies before connecting long_timeout = 60 while gateway_port is None and proc.poll() is None and long_timeout > 0: timeout = 1 # (seconds) readable, _, _ = select.select([callback_socket], [], [], timeout) if callback_socket in readable: gateway_connection = callback_socket.accept()[0] # Determine which ephemeral port the server started on: gateway_port = read_int(gateway_connection.makefile(mode="rb")) gateway_connection.close() callback_socket.close() long_timeout -= 1 if gateway_port is None: raise Exception("The JVM Knit client failed to launch successfully." " Check that java is installed and the Knit JAR" " file exists.") gateway = JavaGateway(GatewayClient(port=gateway_port), auto_convert=True) self.client = gateway.entry_point self.client_gateway = gateway upload = self.check_env_needs_upload(env) self.app_id = self.client.start(env, ','.join(files), app_name, queue, str(upload), lang) long_timeout = 100 master_rpcport = -1 while master_rpcport == -1: master_rpcport = self.client.masterRPCPort() time.sleep(0.2) long_timeout -= 0.2 if long_timeout < 0: break if master_rpcport in [-1, 'N/A']: raise Exception( """The application master JVM process failed to report back. This can mean: - that the YARN cluster cannot scheduler adequate resources - check k.yarn_api.cluster_metrics() and other diagnostic methods; - that the ApplicationMaster crashed - check the application logs, k.logs(); - that the cluster is otherwise unhealthy - check the RM and NN logs (use k.yarn_api.system_logs() to find these on a one-node system """) master_rpchost = self.client.masterRPCHost() gateway = JavaGateway(GatewayClient( address=master_rpchost, port=master_rpcport), auto_convert=True) self.master = gateway.entry_point self.master.init(env, ','.join(files), cmd, num_containers, virtual_cores, memory) return self.app_id
connectionTimeout = 15 while port_number < 25335: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(('', 0)) port_number = s.getsockname()[1] s.close() try: p = subprocess.Popen( ['java', '-jar', 'FifteenPuzzleGateway.jar', str(port_number)]) count = 0 print("Connecting to server. Please wait.") while count < connectionTimeout: time.sleep(1) gateway_server = JavaGateway( GatewayClient(address=host, port=port_number)) count += 1 connected = True try: gateway_server.entry_point.isConnected() except: connected = False if connected: break elif count % 2 == 0 and count < connectionTimeout: print(str(count) + " seconds passed, continue to wait.") if not connected: print("Connection time out over " + str(connectionTimeout) + " seconds") gateway_server.shutdown() p.kill()
def __init__(self, bigdl_type, port=25333): self.value = JavaGateway(GatewayClient(port=port), auto_convert=True)
def launch_gateway(conf=None): """ launch jvm gateway :param conf: spark configuration passed to spark-submit :return: """ if "PYSPARK_GATEWAY_PORT" in os.environ: gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"]) else: SPARK_HOME = _find_spark_home() # Launch the Py4j gateway using Spark's run command so that we pick up the # proper classpath and settings from spark-env.sh on_windows = platform.system() == "Windows" script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit" command = [os.path.join(SPARK_HOME, script)] if conf: for k, v in conf.getAll(): command += ['--conf', '%s=%s' % (k, v)] submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell") if os.environ.get("SPARK_TESTING"): submit_args = ' '.join( ["--conf spark.ui.enabled=false", submit_args]) command = command + shlex.split(submit_args) # Start a socket that will be used by PythonGatewayServer to communicate its port to us callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) callback_socket.bind(('127.0.0.1', 0)) callback_socket.listen(1) callback_host, callback_port = callback_socket.getsockname() env = dict(os.environ) env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port) # Launch the Java gateway. # We open a pipe to stdin so that the Java gateway can die when the pipe is broken if not on_windows: # Don't send ctrl-c / SIGINT to the Java gateway: def preexec_func(): signal.signal(signal.SIGINT, signal.SIG_IGN) proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env) else: # preexec_fn not supported on Windows proc = Popen(command, stdin=PIPE, env=env) gateway_port = None # We use select() here in order to avoid blocking indefinitely if the subprocess dies # before connecting while gateway_port is None and proc.poll() is None: timeout = 1 # (seconds) readable, _, _ = select.select([callback_socket], [], [], timeout) if callback_socket in readable: gateway_connection = callback_socket.accept()[0] # Determine which ephemeral port the server started on: gateway_port = read_int(gateway_connection.makefile(mode="rb")) gateway_connection.close() callback_socket.close() if gateway_port is None: raise Exception( "Java gateway process exited before sending the driver its port number" ) # In Windows, ensure the Java child processes do not linger after Python has exited. # In UNIX-based systems, the child process can kill itself on broken pipe (i.e. when # the parent process' stdin sends an EOF). In Windows, however, this is not possible # because java.lang.Process reads directly from the parent process' stdin, contending # with any opportunity to read an EOF from the parent. Note that this is only best # effort and will not take effect if the python process is violently terminated. if on_windows: # In Windows, the child process here is "spark-submit.cmd", not the JVM itself # (because the UNIX "exec" command is not available). This means we cannot simply # call proc.kill(), which kills only the "spark-submit.cmd" process but not the # JVMs. Instead, we use "taskkill" with the tree-kill option "/t" to terminate all # child processes in the tree (http://technet.microsoft.com/en-us/library/bb491009.aspx) def killChild(): Popen([ "cmd", "/c", "taskkill", "/f", "/t", "/pid", str(proc.pid) ]) atexit.register(killChild) # Connect to the gateway gateway = JavaGateway(GatewayClient(port=gateway_port), auto_convert=True) # Import the classes used by PySpark java_import(gateway.jvm, "org.apache.spark.SparkConf") java_import(gateway.jvm, "org.apache.spark.api.java.*") java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "org.apache.spark.ml.python.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") # TODO(davies): move into sql java_import(gateway.jvm, "org.apache.spark.sql.*") java_import(gateway.jvm, "org.apache.spark.sql.hive.*") java_import(gateway.jvm, "scala.Tuple2") return gateway
def main(): sys_stdin = sys.stdin sys_stdout = sys.stdout sys_stderr = sys.stderr if sys.version >= '3': sys.stdin = io.StringIO() else: sys.stdin = cStringIO.StringIO() sys.stdout = UnicodeDecodingStringIO() sys.stderr = UnicodeDecodingStringIO() spark_major_version = os.getenv("LIVY_SPARK_MAJOR_VERSION") try: listening_port = 0 if os.environ.get("LIVY_TEST") != "true": #Load spark into the context exec('from pyspark.sql import HiveContext', global_dict) exec('from pyspark.streaming import StreamingContext', global_dict) exec('import pyspark.cloudpickle as cloudpickle', global_dict) from py4j.java_gateway import java_import, JavaGateway, GatewayClient from pyspark.conf import SparkConf from pyspark.context import SparkContext from pyspark.sql import SQLContext, HiveContext, Row # Connect to the gateway gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"]) try: from py4j.java_gateway import GatewayParameters gateway_secret = os.environ["PYSPARK_GATEWAY_SECRET"] gateway = JavaGateway(gateway_parameters=GatewayParameters( port=gateway_port, auth_token=gateway_secret, auto_convert=True)) except: gateway = JavaGateway(GatewayClient(port=gateway_port), auto_convert=True) # Import the classes used by PySpark java_import(gateway.jvm, "org.apache.spark.SparkConf") java_import(gateway.jvm, "org.apache.spark.api.java.*") java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") java_import(gateway.jvm, "org.apache.spark.sql.*") java_import(gateway.jvm, "org.apache.spark.sql.hive.*") java_import(gateway.jvm, "scala.Tuple2") jsc = gateway.entry_point.sc() jconf = gateway.entry_point.sc().getConf() jsqlc = gateway.entry_point.hivectx() if gateway.entry_point.hivectx() is not None \ else gateway.entry_point.sqlctx() conf = SparkConf(_jvm=gateway.jvm, _jconf=jconf) sc = SparkContext(jsc=jsc, gateway=gateway, conf=conf) global_dict['sc'] = sc if spark_major_version >= "2": from pyspark.sql import SparkSession spark_session = SparkSession( sc, gateway.entry_point.sparkSession()) sqlc = SQLContext(sc, spark_session, jsqlc) global_dict['sqlContext'] = sqlc global_dict['spark'] = spark_session else: sqlc = SQLContext(sc, jsqlc) global_dict['sqlContext'] = sqlc # LIVY-294, need to check whether HiveContext can work properly, # fallback to SQLContext if HiveContext can not be initialized successfully. # Only for spark-1. code = textwrap.dedent(""" import py4j from pyspark.sql import SQLContext try: sqlContext.tables() except py4j.protocol.Py4JError: sqlContext = SQLContext(sc)""") exec(code, global_dict) #Start py4j callback server from py4j.protocol import ENTRY_POINT_OBJECT_ID from py4j.java_gateway import CallbackServerParameters try: gateway_secret = os.environ["PYSPARK_GATEWAY_SECRET"] gateway.start_callback_server( callback_server_parameters=CallbackServerParameters( port=0, auth_token=gateway_secret)) except: gateway.start_callback_server( callback_server_parameters=CallbackServerParameters( port=0)) socket_info = gateway._callback_server.server_socket.getsockname() listening_port = socket_info[1] pyspark_job_processor = PySparkJobProcessorImpl() gateway.gateway_property.pool.dict[ ENTRY_POINT_OBJECT_ID] = pyspark_job_processor global local_tmp_dir_path, job_context local_tmp_dir_path = tempfile.mkdtemp() job_context = JobContextImpl() print(sys.stdout.getvalue(), file=sys_stderr) print(sys.stderr.getvalue(), file=sys_stderr) clearOutputs() print('READY(port=' + str(listening_port) + ')', file=sys_stdout) sys_stdout.flush() while True: line = sys_stdin.readline() if line == '': break elif line == '\n': continue try: msg = json.loads(line) except ValueError: LOG.error('failed to parse message', exc_info=True) continue try: msg_type = msg['msg_type'] except KeyError: LOG.error('missing message type', exc_info=True) continue try: content = msg['content'] except KeyError: LOG.error('missing content', exc_info=True) continue if not isinstance(content, dict): LOG.error('content is not a dictionary') continue try: handler = msg_type_router[msg_type] except KeyError: LOG.error('unknown message type: %s', msg_type) continue response = handler(content) try: response = json.dumps(response) except ValueError: response = json.dumps({ 'msg_type': 'inspect_reply', 'content': { 'status': 'error', 'ename': 'ValueError', 'evalue': 'cannot json-ify %s' % response, 'traceback': [], } }) print(response, file=sys_stdout) sys_stdout.flush() finally: if os.environ.get("LIVY_TEST") != "true" and 'sc' in global_dict: gateway.shutdown_callback_server() shutil.rmtree(local_tmp_dir_path) global_dict['sc'].stop() sys.stdin = sys_stdin sys.stdout = sys_stdout sys.stderr = sys_stderr
matplotlib.use('Agg') warnings.warn("Unable to load inline matplotlib backend, " "falling back to Agg") def handler_stop_signals(sig, frame): sys.exit("Got signal : " + str(sig)) signal.signal(signal.SIGINT, handler_stop_signals) host = "127.0.0.1" if len(sys.argv) >= 3: host = sys.argv[2] client = GatewayClient(address=host, port=int(sys.argv[1])) #gateway = JavaGateway(client, auto_convert = True) gateway = JavaGateway(client) intp = gateway.entry_point intp.onPythonScriptInitialized(os.getpid()) z = PyZeppelinContext() z._setup_matplotlib() output = Logger() sys.stdout = output #sys.stderr = output while True:
def launch_gateway(host=None, port=None): global _isremote global _forked_proc requesthost = socket.gethostname() requestport = 0 # Launch the Py4j gateway using the MrGeo command so that we pick up the proper classpath fork = True if host is not None and port is not None: requesthost = host requestport = port fork = False else: if "MRGEO_HOST" in os.environ: requesthost = os.environ["MRGEO_HOST"] fork = False if "MRGEO_PORT" in os.environ: requestport = int(os.environ["MRGEO_PORT"]) fork = False if port is not None and requestport == 0: requestport = port # If we didn't get a request port, get one. We open a socket to make sure we get an unused # port, without guessing, if requestport == 0: tmp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) tmp_socket.settimeout(0.01) tmp_socket.bind((requesthost, 0)) # tmp_socket.listen(1) name, requestport = tmp_socket.getsockname() tmp_socket.close() if fork: # Start a socket that will be used by PythonGatewayServer to communicate its port to us script = find_script() # command = [script, "python", "-v", "-p", str(requestport)] command = [script, "python", "-p", str(requestport)] environ = os.environ # Add some more memory environ['HADOOP_CLIENT_OPTS'] = '-Xmx12G ' + environ.get( 'HADOOP_CLIENT_OPTS', '') # Allow remote debugging # environ['HADOOP_CLIENT_OPTS'] = '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005 ' + environ.get('HADOOP_CLIENT_OPTS', '') # Launch the Java gateway. # We open a pipe to stdin so that the Java gateway can die when the pipe is broken # Don't send ctrl-c / SIGINT to the Java gateway: def preexec_func(): os.setsid() signal.signal(signal.SIGINT, signal.SIG_IGN) _forked_proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=environ, bufsize=1, universal_newlines=True) # while True: # out = _forked_proc.stdout.read(1) # # # print("[" + out + "] " + str(_forked_proc.poll())) # if out != '': # break # # if _forked_proc.poll() is not None: # raise Exception("Java gateway process exited before sending the driver its port number: returned: " + # str(_forked_proc.poll())) # time.sleep(5) # We use select() here in order to avoid blocking indefinitely if the subprocess dies # before connecting # while proc.poll() is None: # pass # _forked_proc.stdout = subprocess.STDOUT atexit.register(terminate) timeout = 30 # (seconds) request_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) request_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) start = time.time() connected = -1 while (time.time() - start) < timeout and connected != 0: connected = request_socket.connect_ex((requesthost, requestport)) time.sleep(0.5) if connected != 0: raise Exception("Could not connect to the java gateway process") readable, writable, error = select.select([request_socket], [], [], timeout) # read the communication port from the server if request_socket in readable: data = "" while len(data) < 8: # keep it to 4 bytes (an int) data += request_socket.recv(8) java_python_port, python_java_port = struct.unpack("!ii", data) request_socket.close() else: raise Exception("Port is not readable") _isremote = not fork if java_python_port is None: raise Exception( "Java gateway process exited before sending the driver its port number" ) print("Talking with MrGeo on port " + str(java_python_port)) # Connect to the gateway gateway_client = GatewayClient(address=requesthost, port=java_python_port) gateway = JavaGateway(gateway_client=gateway_client, auto_convert=True, python_proxy_port=python_java_port) # Import the classes used by MrGeo java_import(gateway.jvm, "org.mrgeo.python.*") # Import classes used by Spark java_import(gateway.jvm, "org.apache.spark.SparkConf") java_import(gateway.jvm, "org.apache.spark.api.java.*") java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") return gateway, gateway_client