def __init__(self, shell, data=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = Log("SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) try: should_serialize = conf.serialize() if should_serialize: self.logger.debug("Serialization enabled.") self.magics_home_path = get_magics_home_path() path_to_serialize = join_paths(self.magics_home_path, "state.json") self.logger.debug( "Will serialize to {}.".format(path_to_serialize)) self.spark_controller = SparkController( self.ipython_display, serialize_path=path_to_serialize) else: self.logger.debug("Serialization NOT enabled.") except KeyError: self.logger.error("Could not read env vars for serialization.") self.logger.debug("Initialized spark magics.")
def test_stderr_flush(): ipython_shell = MagicMock() ipython_display = IpythonDisplay() ipython_display._ipython_shell = ipython_shell sys.stderr = MagicMock() ipython_display.send_error('Testing Stderr Flush') assert sys.stderr.flush.call_count == 1
def test_stdout_flush(): ipython_shell = MagicMock() ipython_display = IpythonDisplay() ipython_display._ipython_shell = ipython_shell sys.stdout = MagicMock() ipython_display.write(u'Testing Stdout Flush è') assert sys.stdout.flush.call_count == 1
def test_stderr_flush(): ipython_shell = MagicMock() ipython_display = IpythonDisplay() ipython_display._ipython_shell = ipython_shell sys.stderr = MagicMock() ipython_display.send_error(u'Testing Stderr Flush è') assert sys.stderr.flush.call_count == 1
def test_stdout_flush(): ipython_shell = MagicMock() ipython_display = IpythonDisplay() ipython_display._ipython_shell = ipython_shell sys.stdout = MagicMock() ipython_display.write('Testing Stdout Flush') assert sys.stdout.flush.call_count == 1
class SparkMagicBase(Magics): def __init__(self, shell, data=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = Log("SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) try: should_serialize = conf.serialize() if should_serialize: self.logger.debug("Serialization enabled.") self.magics_home_path = get_magics_home_path() path_to_serialize = join_paths(self.magics_home_path, "state.json") self.logger.debug( "Will serialize to {}.".format(path_to_serialize)) self.spark_controller = SparkController( self.ipython_display, serialize_path=path_to_serialize) else: self.logger.debug("Serialization NOT enabled.") except KeyError: self.logger.error("Could not read env vars for serialization.") self.logger.debug("Initialized spark magics.") def execute_sqlquery(self, sqlquery, session, output_var, quiet): try: df = self.spark_controller.run_cell_sql(sqlquery, session) if output_var is not None: self.shell.user_ns[output_var] = df if quiet: return None else: return df except DataFrameParseException as e: self.ipython_display.send_error(e.out) return None @staticmethod def print_endpoint_info(info_sessions): sessions_info = [" {}".format(i) for i in info_sessions] print("""Info for endpoint: Sessions: {} """.format("\n".join(sessions_info)))
class DataGraph(object): """This does not use the table version of plotly because it freezes up the browser for >60 rows. Instead, we use pandas df HTML representation.""" def __init__(self, display=None): if display is None: self.display = IpythonDisplay() else: self.display = display def render(self, df, encoding, output): with output: max_rows = pd.get_option("display.max_rows") max_cols = pd.get_option("display.max_columns") show_dimensions = pd.get_option("display.show_dimensions") # This will hide the index column for pandas df. self.display.html(""" <style> table.dataframe.hideme thead th:first-child { display: none; } table.dataframe.hideme tbody th { display: none; } </style> """) self.display.html( df.to_html(max_rows=max_rows, max_cols=max_cols, show_dimensions=show_dimensions, notebook=True, classes="hideme")) @staticmethod def display_logarithmic_x_axis(): return False @staticmethod def display_logarithmic_y_axis(): return False @staticmethod def display_x(): return False @staticmethod def display_y(): return False
def __init__(self, reader_writer): assert reader_writer is not None self.logger = Log("ClientManagerStateSerializer") self._ipython_display = IpythonDisplay() self._reader_writer = reader_writer
def __init__(self, shell, data=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = Log("SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) try: should_serialize = conf.serialize() if should_serialize: self.logger.debug("Serialization enabled.") self.magics_home_path = get_magics_home_path() path_to_serialize = join_paths(self.magics_home_path, "state.json") self.logger.debug("Will serialize to {}.".format(path_to_serialize)) self.spark_controller = SparkController(self.ipython_display, serialize_path=path_to_serialize) else: self.logger.debug("Serialization NOT enabled.") except KeyError: self.logger.error("Could not read env vars for serialization.") self.logger.debug("Initialized spark magics.")
def __init__(self, implementation, implementation_version, language, language_version, language_info, session_language, user_code_parser=None, **kwargs): # Required by Jupyter - Override self.implementation = implementation self.implementation_version = implementation_version self.language = language self.language_version = language_version self.language_info = language_info # Override self.session_language = session_language super(SparkKernelBase, self).__init__(**kwargs) self.logger = Log("_jupyter_kernel".format(self.session_language)) self._fatal_error = None self.ipython_display = IpythonDisplay() if user_code_parser is None: self.user_code_parser = UserCodeParser() else: self.user_code_parser = user_code_parser # Disable warnings for test env in HDI requests.packages.urllib3.disable_warnings() if not kwargs.get("testing", False): self._load_magics_extension() self._change_language() if conf.use_auto_viz(): self._register_auto_viz()
class SparkMagicBase(Magics): def __init__(self, shell, data=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = Log("SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) try: should_serialize = conf.serialize() if should_serialize: self.logger.debug("Serialization enabled.") self.magics_home_path = get_magics_home_path() path_to_serialize = join_paths(self.magics_home_path, "state.json") self.logger.debug("Will serialize to {}.".format(path_to_serialize)) self.spark_controller = SparkController(self.ipython_display, serialize_path=path_to_serialize) else: self.logger.debug("Serialization NOT enabled.") except KeyError: self.logger.error("Could not read env vars for serialization.") self.logger.debug("Initialized spark magics.") def execute_sqlquery(self, sqlquery, session, output_var, quiet): try: df = self.spark_controller.run_cell_sql(sqlquery, session) if output_var is not None: self.shell.user_ns[output_var] = df if quiet: return None else: return df except DataFrameParseException as e: self.ipython_display.send_error(e.out) return None @staticmethod def print_endpoint_info(info_sessions): sessions_info = [" {}".format(i) for i in info_sessions] print("""Info for endpoint: Sessions: {} """.format("\n".join(sessions_info)))
def __init__(self, df, encoding, renderer=None, ipywidget_factory=None, encoding_widget=None, ipython_display=None, nested_widget_mode=False, testing=False, **kwargs): assert encoding is not None assert df is not None assert type(df) is pd.DataFrame assert len(df.columns) > 0 kwargs['orientation'] = 'vertical' if not testing: super(AutoVizWidget, self).__init__((), **kwargs) if renderer is None: renderer = GraphRenderer() self.renderer = renderer if ipywidget_factory is None: ipywidget_factory = IpyWidgetFactory() self.ipywidget_factory = ipywidget_factory if encoding_widget is None: encoding_widget = EncodingWidget(df, encoding, self.on_render_viz) self.encoding_widget = encoding_widget if ipython_display is None: ipython_display = IpythonDisplay() self.ipython_display = ipython_display self.df = df self.encoding = encoding # Widget that will become the only child of AutoVizWidget self.widget = self.ipywidget_factory.get_vbox() # Create output area self.to_display = self.ipywidget_factory.get_output() self.to_display.width = "800px" self.output = self.ipywidget_factory.get_hbox() self.output.children = [self.to_display] self.controls = self._create_controls_widget() if nested_widget_mode: self.widget.children = [self.controls, self.output] self.children = [self.widget] else: self.ipython_display.display(self.controls) self.ipython_display.display(self.to_display) self.on_render_viz()
class DataGraph(object): """This does not use the table version of plotly because it freezes up the browser for >60 rows. Instead, we use pandas df HTML representation.""" def __init__(self, display=None): if display is None: self.display = IpythonDisplay() else: self.display = display def render(self, df, encoding, output): with output: max_rows = pd.get_option("display.max_rows") max_cols = pd.get_option("display.max_columns") show_dimensions = pd.get_option("display.show_dimensions") # This will hide the index column for pandas df. self.display.html(""" <style> table.dataframe.hideme thead th:first-child { display: none; } table.dataframe.hideme tbody th { display: none; } </style> """) self.display.html(df.to_html(max_rows=max_rows, max_cols=max_cols, show_dimensions=show_dimensions, notebook=True, classes="hideme")) @staticmethod def display_logarithmic_x_axis(): return False @staticmethod def display_logarithmic_y_axis(): return False @staticmethod def display_x(): return False @staticmethod def display_y(): return False
def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = Log("SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug("Initialized spark magics.") if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event()
def __init__(self, spark_controller, ipywidget_factory=None, ipython_display=None, nested_widget_mode=False, testing=False, **kwargs): kwargs['orientation'] = 'vertical' if not testing: super(AbstractMenuWidget, self).__init__((), **kwargs) self.spark_controller = spark_controller if ipywidget_factory is None: ipywidget_factory = IpyWidgetFactory() self.ipywidget_factory = ipywidget_factory if ipython_display is None: ipython_display = IpythonDisplay() self.ipython_display = ipython_display self.children = [] if not nested_widget_mode: self._repr_html_()
def __init__(self, implementation, implementation_version, language, language_version, language_info, kernel_conf_name, session_language, client_name, **kwargs): # Required by Jupyter - Override self.implementation = implementation self.implementation_version = implementation_version self.language = language self.language_version = language_version self.language_info = language_info # Override self.kernel_conf_name = kernel_conf_name self.session_language = session_language self.client_name = client_name super(SparkKernelBase, self).__init__(**kwargs) self._logger = Log(self.client_name) self._session_started = False self._fatal_error = None self._ipython_display = IpythonDisplay() self.user_command_parser = UserCommandParser() # Disable warnings for test env in HDI requests.packages.urllib3.disable_warnings() if not kwargs.get("testing", False): configuration = self._get_configuration() if not configuration: # _get_configuration() sets the error for us so we can just return now. # The kernel is not in a good state and all do_execute calls will # fail with the fatal error. return (username, password, url) = configuration self.connection_string = get_connection_string(url, username, password) self._load_magics_extension() if conf.use_auto_viz(): self._register_auto_viz()
def __init__(self, display=None): if display is None: self.display = IpythonDisplay() else: self.display = display
class SparkKernelBase(IPythonKernel): def __init__(self, implementation, implementation_version, language, language_version, language_info, session_language, user_code_parser=None, **kwargs): # Required by Jupyter - Override self.implementation = implementation self.implementation_version = implementation_version self.language = language self.language_version = language_version self.language_info = language_info # Override self.session_language = session_language super(SparkKernelBase, self).__init__(**kwargs) self.logger = Log("_jupyter_kernel".format(self.session_language)) self._fatal_error = None self.ipython_display = IpythonDisplay() if user_code_parser is None: self.user_code_parser = UserCodeParser() else: self.user_code_parser = user_code_parser # Disable warnings for test env in HDI requests.packages.urllib3.disable_warnings() if not kwargs.get("testing", False): self._load_magics_extension() self._change_language() if conf.use_auto_viz(): self._register_auto_viz() def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): def f(self): if self._fatal_error is not None: return self._repeat_fatal_error() return self._do_execute(code, silent, store_history, user_expressions, allow_stdin) return wrap_unexpected_exceptions(f, self._complete_cell)(self) def do_shutdown(self, restart): # Cleanup self._delete_session() return self._do_shutdown_ipykernel(restart) def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin): code_to_run = self.user_code_parser.get_code_to_run(code) res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin) return res def _load_magics_extension(self): register_magics_code = "%load_ext remotespark.kernels" self._execute_cell(register_magics_code, True, False, shutdown_if_error=True, log_if_error="Failed to load the Spark kernels magics library.") self.logger.debug("Loaded magics.") def _change_language(self): register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(self.session_language) self._execute_cell(register_magics_code, True, False, shutdown_if_error=True, log_if_error="Failed to change language to {}.".format(self.session_language)) self.logger.debug("Changed language.") def _register_auto_viz(self): register_auto_viz_code = """from remotespark.datawidgets.utils import display_dataframe ip = get_ipython() ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)""" self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True, log_if_error="Failed to register auto viz for notebook.") self.logger.debug("Registered auto viz.") def _delete_session(self): code = "%%_do_not_call_delete_session\n " self._execute_cell_for_user(code, True, False) def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False, shutdown_if_error=False, log_if_error=None): reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin) if shutdown_if_error and reply_content[u"status"] == u"error": error_from_reply = reply_content[u"evalue"] if log_if_error is not None: message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply) return self._abort_with_fatal_error(message) return reply_content def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin) def _do_shutdown_ipykernel(self, restart): return super(SparkKernelBase, self).do_shutdown(restart) def _complete_cell(self): """A method that runs a cell with no effect. Call this and return the value it returns when there's some sort of error preventing the user's cell from executing; this will register the cell from the Jupyter UI as being completed.""" return self._execute_cell("None", False, True, None, False) def _show_user_error(self, message): self.logger.error(message) self.ipython_display.send_error(message) def _queue_fatal_error(self, message): """Queues up a fatal error to be thrown when the next cell is executed; does not raise an error immediately. We use this for errors that happen on kernel startup, since IPython crashes if we throw an exception in the __init__ method.""" self._fatal_error = message def _abort_with_fatal_error(self, message): """Queues up a fatal error and throws it immediately.""" self._queue_fatal_error(message) return self._repeat_fatal_error() def _repeat_fatal_error(self): """Throws an error that has already been queued.""" error = conf.fatal_error_suggestion().format(self._fatal_error) self.logger.error(error) self.ipython_display.send_error(error) return self._complete_cell()
class RemoteSparkMagics(Magics): def __init__(self, shell, data=None): # You must call the parent constructor super(RemoteSparkMagics, self).__init__(shell) self.logger = Log("RemoteSparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) try: should_serialize = conf.serialize() if should_serialize: self.logger.debug("Serialization enabled.") self.magics_home_path = get_magics_home_path() path_to_serialize = join_paths(self.magics_home_path, "state.json") self.logger.debug("Will serialize to {}.".format(path_to_serialize)) self.spark_controller = SparkController(self.ipython_display, serialize_path=path_to_serialize) else: self.logger.debug("Serialization NOT enabled.") except KeyError: self.logger.error("Could not read env vars for serialization.") self.logger.debug("Initialized spark magics.") @magic_arguments() @argument("-c", "--context", type=str, default=Constants.context_name_spark, help="Context to use: '{}' for spark, '{}' for sql queries, and '{}' for hive queries. " "Default is '{}'.".format(Constants.context_name_spark, Constants.context_name_sql, Constants.context_name_hive, Constants.context_name_spark)) @argument("-s", "--session", help="The name of the Livy session to use. " "If only one session has been created, there's no need to specify one.") @argument("-o", "--output", type=str, default=None, help="If present, output when using SQL or Hive " "query will be stored in variable of this name.") @argument("command", type=str, default=[""], nargs="*", help="Commands to execute.") @needs_local_scope @line_cell_magic def spark(self, line, cell="", local_ns=None): """Magic to execute spark remotely. This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session. When the SQL context is used, the result will be a Pandas dataframe of a sample of the results. If invoked with no subcommand, the cell will be executed against the specified session. Subcommands ----------- info Display the available Livy sessions and other configurations for sessions. add Add a Livy session. First argument is the name of the session, second argument is the language, and third argument is the connection string of the Livy endpoint. A fourth argument specifying if session creation can be skipped if it already exists is optional: "skip" or empty. e.g. `%%spark add test python url=https://sparkcluster.net/livy;username=u;password=p skip` or e.g. `%%spark add test python url=https://sparkcluster.net/livy;username=u;password=p` config Override the livy session properties sent to Livy on session creation. All session creations will contain these config settings from then on. Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions endpoint in Livy. e.g. `%%spark config {"driverMemory":"1000M", "executorCores":4}` run Run Spark code against a session. e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession previously created and store the pandas dataframe created in the my_var variable in the Python environment. logs Returns the logs for a given session. e.g. `%%spark logs -s testsession` will return the logs for the testsession previously created delete Delete a Livy session. Argument is the name of the session to be deleted. e.g. `%%spark delete defaultlivy` cleanup Delete all Livy sessions created by the notebook. No arguments required. e.g. `%%spark cleanup` """ usage = "Please look at usage of %spark by executing `%spark?`." user_input = line args = parse_argstring(self.spark, user_input) subcommand = args.command[0].lower() try: # info if subcommand == "info": if len(args.command) == 2: connection_string = args.command[1] info_sessions = self.spark_controller.get_all_sessions_endpoint_info(connection_string) self._print_endpoint_info(info_sessions) elif len(args.command) == 1: self._print_local_info() else: raise ValueError("Subcommand 'info' requires no value or a connection string to show all sessions.\n" "{}".format(usage)) # config elif subcommand == "config": # Would normally do " ".join(args.command[1:]) but parse_argstring removes quotes... rest_of_line = user_input[7:] conf.override(conf.session_configs.__name__, json.loads(rest_of_line)) # add elif subcommand == "add": if len(args.command) != 4 and len(args.command) != 5: raise ValueError("Subcommand 'add' requires three or four arguments.\n{}".format(usage)) name = args.command[1].lower() language = args.command[2].lower() connection_string = args.command[3] if len(args.command) == 5: skip = args.command[4].lower() == "skip" else: skip = False properties = copy.deepcopy(conf.session_configs()) properties["kind"] = self._get_livy_kind(language) self.spark_controller.add_session(name, connection_string, skip, properties) # delete elif subcommand == "delete": if len(args.command) == 2: name = args.command[1].lower() self.spark_controller.delete_session_by_name(name) elif len(args.command) == 3: connection_string = args.command[1] session_id = args.command[2] self.spark_controller.delete_session_by_id(connection_string, session_id) else: raise ValueError("Subcommand 'delete' requires a session name or a connection string and id.\n{}" .format(usage)) # cleanup elif subcommand == "cleanup": if len(args.command) == 2: connection_string = args.command[1] self.spark_controller.cleanup_endpoint(connection_string) elif len(args.command) == 1: self.spark_controller.cleanup() else: raise ValueError("Subcommand 'cleanup' requires no further values or a connection string to clean up " "sessions.\n{}".format(usage)) # logs elif subcommand == "logs": if len(args.command) == 1: (success, out) = self.spark_controller.get_logs(args.session) if success: self.ipython_display.write(out) else: self.ipython_display.send_error(out) else: raise ValueError("Subcommand 'logs' requires no further values.\n{}".format(usage)) # run elif len(subcommand) == 0: if args.context == Constants.context_name_spark: (success, out) = self.spark_controller.run_cell(cell, args.session) if success: self.ipython_display.write(out) else: self.ipython_display.send_error(out) elif args.context == Constants.context_name_sql: return self._execute_against_context_that_returns_df(self.spark_controller.run_cell_sql, cell, args.session, args.output) elif args.context == Constants.context_name_hive: return self._execute_against_context_that_returns_df(self.spark_controller.run_cell_hive, cell, args.session, args.output) else: raise ValueError("Context '{}' not found".format(args.context)) # error else: raise ValueError("Subcommand '{}' not found. {}".format(subcommand, usage)) except ValueError as err: self.ipython_display.send_error("{}".format(err)) def _execute_against_context_that_returns_df(self, method, cell, session, output_var): try: df = method(cell, session) if output_var is not None: self.shell.user_ns[output_var] = df return df except DataFrameParseException as e: self.ipython_display.send_error(e.out) return None def _print_local_info(self): sessions_info = [" {}".format(i) for i in self.spark_controller.get_manager_sessions_str()] print("""Info for running Spark: Sessions: {} Session configs: {} """.format("\n".join(sessions_info), conf.session_configs())) def _print_endpoint_info(self, info_sessions): sessions_info = [" {}".format(i) for i in info_sessions] print("""Info for endpoint: Sessions: {} """.format("\n".join(sessions_info))) @staticmethod def _get_livy_kind(language): if language == Constants.lang_scala: return Constants.session_kind_spark elif language == Constants.lang_python: return Constants.session_kind_pyspark elif language == Constants.lang_r: return Constants.session_kind_sparkr else: raise ValueError("Cannot get session kind for {}.".format(language))
class SparkKernelBase(IPythonKernel): def __init__(self, implementation, implementation_version, language, language_version, language_info, session_language, user_code_parser=None, **kwargs): # Required by Jupyter - Override self.implementation = implementation self.implementation_version = implementation_version self.language = language self.language_version = language_version self.language_info = language_info # Override self.session_language = session_language super(SparkKernelBase, self).__init__(**kwargs) self.logger = Log("_jupyter_kernel".format(self.session_language)) self._fatal_error = None self.ipython_display = IpythonDisplay() if user_code_parser is None: self.user_code_parser = UserCodeParser() else: self.user_code_parser = user_code_parser # Disable warnings for test env in HDI requests.packages.urllib3.disable_warnings() if not kwargs.get("testing", False): self._load_magics_extension() self._change_language() if conf.use_auto_viz(): self._register_auto_viz() def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): def f(self): if self._fatal_error is not None: return self._repeat_fatal_error() return self._do_execute(code, silent, store_history, user_expressions, allow_stdin) return wrap_unexpected_exceptions(f, self._complete_cell)(self) def do_shutdown(self, restart): # Cleanup self._delete_session() return self._do_shutdown_ipykernel(restart) def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin): code_to_run = self.user_code_parser.get_code_to_run(code) res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin) return res def _load_magics_extension(self): register_magics_code = "%load_ext remotespark.kernels" self._execute_cell( register_magics_code, True, False, shutdown_if_error=True, log_if_error="Failed to load the Spark kernels magics library.") self.logger.debug("Loaded magics.") def _change_language(self): register_magics_code = "%%_do_not_call_change_language -l {}\n ".format( self.session_language) self._execute_cell( register_magics_code, True, False, shutdown_if_error=True, log_if_error="Failed to change language to {}.".format( self.session_language)) self.logger.debug("Changed language.") def _register_auto_viz(self): register_auto_viz_code = """from remotespark.datawidgets.utils import display_dataframe ip = get_ipython() ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)""" self._execute_cell( register_auto_viz_code, True, False, shutdown_if_error=True, log_if_error="Failed to register auto viz for notebook.") self.logger.debug("Registered auto viz.") def _delete_session(self): code = "%%_do_not_call_delete_session\n " self._execute_cell_for_user(code, True, False) def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False, shutdown_if_error=False, log_if_error=None): reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin) if shutdown_if_error and reply_content[u"status"] == u"error": error_from_reply = reply_content[u"evalue"] if log_if_error is not None: message = "{}\nException details:\n\t\"{}\"".format( log_if_error, error_from_reply) return self._abort_with_fatal_error(message) return reply_content def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin) def _do_shutdown_ipykernel(self, restart): return super(SparkKernelBase, self).do_shutdown(restart) def _complete_cell(self): """A method that runs a cell with no effect. Call this and return the value it returns when there's some sort of error preventing the user's cell from executing; this will register the cell from the Jupyter UI as being completed.""" return self._execute_cell("None", False, True, None, False) def _show_user_error(self, message): self.logger.error(message) self.ipython_display.send_error(message) def _queue_fatal_error(self, message): """Queues up a fatal error to be thrown when the next cell is executed; does not raise an error immediately. We use this for errors that happen on kernel startup, since IPython crashes if we throw an exception in the __init__ method.""" self._fatal_error = message def _abort_with_fatal_error(self, message): """Queues up a fatal error and throws it immediately.""" self._queue_fatal_error(message) return self._repeat_fatal_error() def _repeat_fatal_error(self): """Throws an error that has already been queued.""" error = conf.fatal_error_suggestion().format(self._fatal_error) self.logger.error(error) self.ipython_display.send_error(error) return self._complete_cell()
class SparkKernelBase(IPythonKernel): def __init__(self, implementation, implementation_version, language, language_version, language_info, kernel_conf_name, session_language, client_name, **kwargs): # Required by Jupyter - Override self.implementation = implementation self.implementation_version = implementation_version self.language = language self.language_version = language_version self.language_info = language_info # Override self.kernel_conf_name = kernel_conf_name self.session_language = session_language self.client_name = client_name super(SparkKernelBase, self).__init__(**kwargs) self._logger = Log(self.client_name) self._session_started = False self._fatal_error = None self._ipython_display = IpythonDisplay() self.user_command_parser = UserCommandParser() # Disable warnings for test env in HDI requests.packages.urllib3.disable_warnings() if not kwargs.get("testing", False): configuration = self._get_configuration() if not configuration: # _get_configuration() sets the error for us so we can just return now. # The kernel is not in a good state and all do_execute calls will # fail with the fatal error. return (username, password, url) = configuration self.connection_string = get_connection_string(url, username, password) self._load_magics_extension() if conf.use_auto_viz(): self._register_auto_viz() def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): if self._fatal_error is not None: self._repeat_fatal_error() # Parse command subcommand, force, output_var, command = self.user_command_parser.parse_user_command(code) # Get transformer transformer = self._get_code_transformer(subcommand) # Get instructions try: code_to_run, error_to_show, begin_action, end_action, deletes_session = \ transformer.get_code_to_execute(self._session_started, self.connection_string, force, output_var, command) except SyntaxError as se: self._show_user_error("{}".format(se)) else: # Execute instructions if error_to_show is not None: self._show_user_error(error_to_show) return self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin) if begin_action == Constants.delete_session_action: self._delete_session() elif begin_action == Constants.start_session_action: self._start_session() elif begin_action == Constants.do_nothing_action: pass else: raise ValueError("Begin action {} not supported.".format(begin_action)) res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin) if end_action == Constants.delete_session_action: self._delete_session() elif end_action == Constants.start_session_action: self._start_session() elif end_action == Constants.do_nothing_action: pass else: raise ValueError("End action {} not supported.".format(end_action)) if deletes_session: self._session_started = False return res return self._execute_cell("", silent, store_history, user_expressions, allow_stdin) def do_shutdown(self, restart): # Cleanup self._delete_session() return self._do_shutdown_ipykernel(restart) @staticmethod def _get_code_transformer(subcommand): if subcommand == UserCommandParser.run_command: return SparkTransformer(subcommand) elif subcommand == UserCommandParser.sql_command: return SqlTransformer(subcommand) elif subcommand == UserCommandParser.hive_command: return HiveTransformer(subcommand) elif subcommand == UserCommandParser.config_command: return ConfigTransformer(subcommand) elif subcommand == UserCommandParser.info_command: return InfoTransformer(subcommand) elif subcommand == UserCommandParser.delete_command: return DeleteSessionTransformer(subcommand) elif subcommand == UserCommandParser.clean_up_command: return CleanUpTransformer(subcommand) elif subcommand == UserCommandParser.logs_command: return LogsTransformer(subcommand) elif subcommand == UserCommandParser.local_command: return PythonTransformer(subcommand) else: return NotSupportedTransformer(subcommand) def _load_magics_extension(self): register_magics_code = "%load_ext remotespark" self._execute_cell(register_magics_code, True, False, shutdown_if_error=True, log_if_error="Failed to load the Spark magics library.") self._logger.debug("Loaded magics.") def _register_auto_viz(self): register_auto_viz_code = """from remotespark.datawidgets.utils import display_dataframe ip = get_ipython() ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)""" self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True, log_if_error="Failed to register auto viz for notebook.") self._logger.debug("Registered auto viz.") def _start_session(self): if not self._session_started: self._session_started = True add_session_code = "%spark add {} {} {} skip".format( self.client_name, self.session_language, self.connection_string) self._execute_cell(add_session_code, True, False, shutdown_if_error=True, log_if_error="Failed to create a Livy session.") self._logger.debug("Added session.") def _delete_session(self): if self._session_started: code = "%spark cleanup" self._execute_cell_for_user(code, True, False) self._session_started = False def _get_configuration(self): """Returns (username, password, url). If there is an error (missing configuration), returns False.""" try: credentials = getattr(conf, 'kernel_' + self.kernel_conf_name + '_credentials')() ret = (credentials['username'], credentials['password'], credentials['url']) # The URL has to be set in the configuration. assert(ret[2]) return ret except (KeyError, AssertionError): message = "Please set configuration for 'kernel_{}_credentials' to initialize Kernel".format( self.kernel_conf_name) self._queue_fatal_error(message) return False def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False, shutdown_if_error=False, log_if_error=None): reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin) if shutdown_if_error and reply_content[u"status"] == u"error": error_from_reply = reply_content[u"evalue"] if log_if_error is not None: message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply) self._abort_with_fatal_error(message) return reply_content def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin) def _do_shutdown_ipykernel(self, restart): return super(SparkKernelBase, self).do_shutdown(restart) def _show_user_error(self, message): self._logger.error(message) self._ipython_display.send_error(message) def _queue_fatal_error(self, message): """Queues up a fatal error to be thrown when the next cell is executed; does not raise an error immediately. We use this for errors that happen on kernel startup, since IPython crashes if we throw an exception in the __init__ method.""" self._fatal_error = message def _abort_with_fatal_error(self, message): """Queues up a fatal error and throws it immediately.""" self._queue_fatal_error(message) self._repeat_fatal_error() def _repeat_fatal_error(self): """Throws an error that has already been queued.""" error = conf.fatal_error_suggestion().format(self._fatal_error) self._logger.error(error) self._ipython_display.send_error(error) raise ValueError(self._fatal_error)