def _do_not_call_start_session(self, line, cell="", local_ns=None): # Starts a session unless session is already created or a fatal error occurred. Returns True when session is # created successfully. # No need to add the handle_expected_exceptions decorator to this since we manually catch all # exceptions when starting the session. if self.fatal_error: self.ipython_display.send_error(self.fatal_error_message) return False if not self.session_started: skip = False properties = conf.get_session_properties(self.language) self.session_started = True try: self.spark_controller.add_session(self.session_name, self.endpoint, skip, properties) except Exception as e: self.fatal_error = True self.fatal_error_message = conf.fatal_error_suggestion().format(e) self.logger.error(u"Error creating session: {}".format(e)) self.ipython_display.send_error(self.fatal_error_message) return False return self.session_started
def run(self): try: properties_json = self.properties.value if properties_json.strip() != "": conf.override(conf.session_configs.__name__, json.loads(self.properties.value)) except ValueError as e: self.ipython_display.send_error("Session properties must be a valid JSON string. Error:\n{}".format(e)) return endpoint = self.endpoints_dropdown_widget.value language = self.lang_widget.value alias = self.session_widget.value skip = False properties = conf.get_session_properties(language) try: self.spark_controller.add_session(alias, endpoint, skip, properties) except ValueError as e: self.ipython_display.send_error( """Could not add session with name: {} properties: {} due to error: '{}'""".format( alias, properties, e ) ) return self.refresh_method()
def info(self, line, cell=u"", local_ns=None): parse_argstring_or_throw(self.info, line) self._assure_cell_body_is_empty(KernelMagics.info.__name__, cell) if self.session_started: current_session_id = self.spark_controller.get_session_id_for_client(self.session_name) else: current_session_id = None self.ipython_display.html(u"Current session configs: <tt>{}</tt><br>".format(conf.get_session_properties(self.language))) info_sessions = self.spark_controller.get_all_sessions_endpoint(self.endpoint) self._print_endpoint_info(info_sessions, current_session_id)
def info(self, line, cell=u"", local_ns=None): parse_argstring_or_throw(self.info, line) self._assure_cell_body_is_empty(KernelMagics.info.__name__, cell) if self.session_started: current_session_id = self.spark_controller.get_session_id_for_client( self.session_name) else: current_session_id = None self.ipython_display.html( u"Current session configs: <tt>{}</tt><br>".format( conf.get_session_properties(self.language))) info_sessions = self.spark_controller.get_all_sessions_endpoint( self.endpoint) self._print_endpoint_info(info_sessions, current_session_id)
def _do_not_call_start_session(self, line, cell="", local_ns=None): # Starts a session unless session is already created or a fatal error occurred. Returns True when session is # created successfully. # No need to add the handle_expected_exceptions decorator to this since we manually catch all # exceptions when starting the session. if self.fatal_error: self.ipython_display.send_error(self.fatal_error_message) return False if not self.session_started: skip = False properties = conf.get_session_properties(self.language) self.session_started = True try: self.spark_controller.add_session(self.session_name, self.endpoint, skip, properties) except Exception as e: self.fatal_error = True self.fatal_error_message = conf.fatal_error_suggestion( ).format(e) self.logger.error(u"Error creating session: {}".format(e)) self.ipython_display.send_error(self.fatal_error_message) return False ## ^^ signifyd_initialization session_init_file = os.environ.get("SESSION_INIT_FILE") if session_init_file: session_init_file = os.path.expanduser(session_init_file) if os.path.exists(session_init_file): with open(session_init_file) as filep: self.logger.info( "Initialize session with %s. session_name=%s, endpoint=%s" % (session_init_file, self.session_name, self.endpoint)) code = filep.read() (success, out) = self.spark_controller.run_command( Command(code), self.session_name) self.logger.info("return %s => %s" % (success, out)) ## $$ return self.session_started
def __get_or_create_session(self, language): proxy_user = getpass.getuser() self.session_language = language endpoint = build_endpoint(self.session_language) kernel_instance_id = id(self.shell.kernel) session_name_seleted = self.spark_controller.generate_livy_session_name( kernel_instance_id) properties = conf.get_session_properties(self.session_language) properties["proxyUser"] = proxy_user properties["session_language"] = self.session_language properties["session_name"] = session_name_seleted session_info_list = self.spark_controller.get_all_sessions_endpoint( endpoint) for session in session_info_list: # session kind 必须一致 if session.kind != properties['kind']: continue # 区分pyspark 及 pyspark3 if session.session_language != properties['session_language']: continue session_name = self._get_session_name_by_session(session) if session_name == session_name_seleted: if session.status in constants.HEALTHY_SESSION_STATUS: return session_name_seleted elif session.status in constants.FINAL_STATEMENT_STATUS: # FINAL, recreate new session self.spark_controller.add_session(session_name_seleted, endpoint, False, properties) return session_name_seleted else: # 如果livy中没有session,则创建session self.spark_controller.add_session(session_name_seleted, endpoint, False, properties) return session_name_seleted
def run(self): try: properties_json = self.properties.value if properties_json.strip() != "": conf.override(conf.session_configs.__name__, json.loads(self.properties.value)) except ValueError as e: self.ipython_display.send_error( "Session properties must be a valid JSON string. Error:\n{}". format(e)) return language = self.lang_widget.value alias = self.session_widget.value skip = False properties = conf.get_session_properties(language) properties["name"] = alias properties["conf"][ "spark.kubernetes.file.upload.path"] = conf.s3_bucket() properties["conf"][ "spark.hadoop.fs.s3a.access.key"] = conf.s3_access_key() properties["conf"][ "spark.hadoop.fs.s3a.secret.key"] = conf.s3_secret_key() try: self.spark_controller.add_session(alias, self.endpoint, skip, properties) except ValueError as e: self.ipython_display.send_error("""Could not add session with name: {} properties: {} due to error: '{}'""".format(alias, properties, e)) return self.refresh_method()
def spark(self, line, cell="", local_ns=None): """Magic to execute spark remotely. This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session. When the SQL context is used, the result will be a Pandas dataframe of a sample of the results. If invoked with no subcommand, the cell will be executed against the specified session. Subcommands ----------- info Display the available Livy sessions and other configurations for sessions. add Add a Livy session given a session name (-s), language (-l), and endpoint credentials. The -k argument, if present, will skip adding this session if it already exists. e.g. `%spark add -s test -l python -u https://sparkcluster.net/livy -t Kerberos -a u -p -k` config Override the livy session properties sent to Livy on session creation. All session creations will contain these config settings from then on. Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions endpoint in Livy. e.g. `%%spark config` `{"driverMemory":"1000M", "executorCores":4}` run Run Spark code against a session. e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession previously created and store the pandas dataframe created in the my_var variable in the Python environment. logs Returns the logs for a given session. e.g. `%spark logs -s testsession` will return the logs for the testsession previously created delete Delete a Livy session. e.g. `%spark delete -s defaultlivy` cleanup Delete all Livy sessions created by the notebook. No arguments required. e.g. `%spark cleanup` """ usage = "Please look at usage of %spark by executing `%spark?`." user_input = line args = parse_argstring_or_throw(self.spark, user_input) subcommand = args.command[0].lower() if args.auth is None: args.auth = conf.get_auth_value(args.user, args.password) else: args.auth = args.auth # info if subcommand == "info": if args.url is not None: endpoint = Endpoint(args.url, args.auth, args.user, args.password) info_sessions = self.spark_controller.get_all_sessions_endpoint_info(endpoint) self._print_endpoint_info(info_sessions) else: self._print_local_info() # config elif subcommand == "config": conf.override(conf.session_configs.__name__, json.loads(cell)) #conf file elif subcommand == "encoded": language = args.language session = args.session conf_json = json.loads(base64.b64decode(args.encodedconf).decode('utf-8')) lang_args = conf_json['kernel_{}_credentials'.format(language)] url = lang_args['url'] auth = lang_args['auth'] username = lang_args['username'] password = lang_args['base64_password'] conf.override_all(conf_json) properties = conf.get_session_properties(language) if url is not None: endpoint = Endpoint(url, auth, username, password) info_sessions = self.spark_controller.get_all_sessions_endpoint_info(endpoint) if session in info_sessions: print("found session") else: self.spark_controller.add_session(session, endpoint, True, properties) coerce = get_coerce_value(args.coerce) if args.context == CONTEXT_NAME_SPARK: return self.execute_spark(cell, args.output, args.samplemethod, args.maxrows, args.samplefraction, session, coerce) elif args.context == CONTEXT_NAME_SQL: return self.execute_sqlquery(cell, args.samplemethod, args.maxrows, args.samplefraction, session, args.output, args.quiet, coerce) else: self.ipython_display.send_error("Context '{}' not found".format(args.context)) #check if session already exists # add elif subcommand == "add": if args.url is None: self.ipython_display.send_error("Need to supply URL argument (e.g. -u https://example.com/livyendpoint)") return name = args.session language = args.language endpoint = Endpoint(args.url, args.auth, args.user, args.password) skip = args.skip properties = conf.get_session_properties(language) self.spark_controller.add_session(name, endpoint, skip, properties) # delete elif subcommand == "delete": if args.session is not None: self.spark_controller.delete_session_by_name(args.session) elif args.url is not None: if args.id is None: self.ipython_display.send_error("Must provide --id or -i option to delete session at endpoint from URL") return endpoint = Endpoint(args.url, args.auth, args.user, args.password) session_id = args.id self.spark_controller.delete_session_by_id(endpoint, session_id) else: self.ipython_display.send_error("Subcommand 'delete' requires a session name or a URL and session ID") # cleanup elif subcommand == "cleanup": if args.url is not None: endpoint = Endpoint(args.url, args.auth, args.user, args.password) self.spark_controller.cleanup_endpoint(endpoint) else: self.spark_controller.cleanup() # logs elif subcommand == "logs": self.ipython_display.write(self.spark_controller.get_logs(args.session)) # run elif len(subcommand) == 0: coerce = get_coerce_value(args.coerce) if args.context == CONTEXT_NAME_SPARK: return self.execute_spark(cell, args.output, args.samplemethod, args.maxrows, args.samplefraction, args.session, coerce) elif args.context == CONTEXT_NAME_SQL: return self.execute_sqlquery(cell, args.samplemethod, args.maxrows, args.samplefraction, args.session, args.output, args.quiet, coerce) else: self.ipython_display.send_error("Context '{}' not found".format(args.context)) # error else: self.ipython_display.send_error("Subcommand '{}' not found. {}".format(subcommand, usage))
def spark(self, line, cell="", local_ns=None): """Magic to execute spark remotely. This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session. When the SQL context is used, the result will be a Pandas dataframe of a sample of the results. If invoked with no subcommand, the cell will be executed against the specified session. Subcommands ----------- info Display the available Livy sessions and other configurations for sessions. add Add a Livy session given a session name (-s), language (-l), and endpoint credentials. The -k argument, if present, will skip adding this session if it already exists. e.g. `%spark add -s test -l python -u https://sparkcluster.net/livy -a u -p -k` config Override the livy session properties sent to Livy on session creation. All session creations will contain these config settings from then on. Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions endpoint in Livy. e.g. `%%spark config` `{"driverMemory":"1000M", "executorCores":4}` run Run Spark code against a session. e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession previously created and store the pandas dataframe created in the my_var variable in the Python environment. logs Returns the logs for a given session. e.g. `%spark logs -s testsession` will return the logs for the testsession previously created delete Delete a Livy session. e.g. `%spark delete -s defaultlivy` cleanup Delete all Livy sessions created by the notebook. No arguments required. e.g. `%spark cleanup` """ usage = "Please look at usage of %spark by executing `%spark?`." user_input = line args = parse_argstring_or_throw(self.spark, user_input) subcommand = args.command[0].lower() # info if subcommand == "info": if args.url is not None: endpoint = Endpoint(args.url, args.user, args.password) info_sessions = self.spark_controller.get_all_sessions_endpoint_info(endpoint) self._print_endpoint_info(info_sessions) else: self._print_local_info() # config elif subcommand == "config": conf.override(conf.session_configs.__name__, json.loads(cell)) # add elif subcommand == "add": if args.url is None: self.ipython_display.send_error("Need to supply URL argument (e.g. -u https://example.com/livyendpoint)") return name = args.session language = args.language endpoint = Endpoint(args.url, args.user, args.password) skip = args.skip properties = conf.get_session_properties(language) self.spark_controller.add_session(name, endpoint, skip, properties) # delete elif subcommand == "delete": if args.session is not None: self.spark_controller.delete_session_by_name(args.session) elif args.url is not None: if args.id is None: self.ipython_display.send_error("Must provide --id or -i option to delete session at endpoint from URL") return endpoint = Endpoint(args.url, args.user, args.password) session_id = args.id self.spark_controller.delete_session_by_id(endpoint, session_id) else: self.ipython_display.send_error("Subcommand 'delete' requires a session name or a URL and session ID") # cleanup elif subcommand == "cleanup": if args.url is not None: endpoint = Endpoint(args.url, args.user, args.password) self.spark_controller.cleanup_endpoint(endpoint) else: self.spark_controller.cleanup() # logs elif subcommand == "logs": self.ipython_display.write(self.spark_controller.get_logs(args.session)) # run elif len(subcommand) == 0: if args.context == CONTEXT_NAME_SPARK: (success, out) = self.spark_controller.run_command(Command(cell), args.session) if success: self.ipython_display.write(out) else: self.ipython_display.send_error(out) elif args.context == CONTEXT_NAME_SQL: return self.execute_sqlquery(cell, args.samplemethod, args.maxrows, args.samplefraction, args.session, args.output, args.quiet) else: self.ipython_display.send_error("Context '{}' not found".format(args.context)) # error else: self.ipython_display.send_error("Subcommand '{}' not found. {}".format(subcommand, usage))
def spark(self, line, cell="", local_ns=None): """Magic to execute spark remotely. This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session. When the SQL context is used, the result will be a Pandas dataframe of a sample of the results. If invoked with no subcommand, the cell will be executed against the specified session. Subcommands ----------- info Display the available Livy sessions and other configurations for sessions with None, Basic, or Kerberos auth. add Add a Livy session given a session name (-s), language (-l), and endpoint credentials. The -k argument, if present, will skip adding this session if it already exists. e.g. `%spark add -s test -l python -u https://sparkcluster.net/livy -t Kerberos -a u -p -k` config Override the livy session properties sent to Livy on session creation. All session creations will contain these config settings from then on. Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions endpoint in Livy. e.g. `%%spark config` `{"driverMemory":"1000M", "executorCores":4}` run Run Spark code against a session. e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession previously created and store the pandas dataframe created in the my_var variable in the Python environment. logs Returns the logs for a given session. e.g. `%spark logs -s testsession` will return the logs for the testsession previously created delete Delete a Livy session. e.g. `%spark delete -s defaultlivy` cleanup Delete all Livy sessions created by the notebook. No arguments required. e.g. `%spark cleanup` """ user_input = line args = parse_argstring_or_throw(self.spark, user_input) subcommand = args.command[0].lower() if args.auth == "Google" and subcommand == "add": if args.url is None: self.ipython_display.send_error( "Need to supply URL argument (e.g. -u https://example.com/livyendpoint)" ) return name = args.session language = args.language endpoint = Endpoint(args.url, initialize_auth(args)) self.endpoints[args.url] = endpoint # convert Endpoints in self.endpoints into list of dictionaries, each storing an # Endpoints writeable attributes stored_endpoints = [ SerializableEndpoint(endpoint).__dict__ for endpoint in self.endpoints.values() ] self.db['autorestore/' + 'stored_endpoints'] = stored_endpoints skip = args.skip properties = conf.get_session_properties(language) self.spark_controller.add_session(name, endpoint, skip, properties) # session_id_to_name dict is necessary to restore session name across notebook sessions # since the livy server does not store the name. session_id_to_name = get_session_id_to_name(self.db, self.ipython_display) # add session id -> name to session_id_to_name dict session_id_to_name[self.spark_controller.session_manager.get_session(name).id] = name self.db['autorestore/' + 'session_id_to_name'] = session_id_to_name elif subcommand == "info": if args.url is not None and args.id is not None: endpoint = Endpoint(args.url, initialize_auth(args)) info_sessions = self.spark_controller.get_all_sessions_endpoint_info(endpoint) self._print_endpoint_info(info_sessions, args.id) else: self._print_local_info() else: self.__remotesparkmagics.spark(line, cell, local_ns=None)