def __init__(self, implementation, implementation_version, language, language_version, language_info, session_language, user_code_parser=None, **kwargs): # Required by Jupyter - Override self.implementation = implementation self.implementation_version = implementation_version self.language = language self.language_version = language_version self.language_info = language_info # Override self.session_language = session_language super(SparkKernelBase, self).__init__(**kwargs) self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language)) self._fatal_error = None self.ipython_display = IpythonDisplay() if user_code_parser is None: self.user_code_parser = UserCodeParser() else: self.user_code_parser = user_code_parser # Disable warnings for test env in HDI requests.packages.urllib3.disable_warnings() if not kwargs.get("testing", False): self._load_magics_extension() self._change_language() if conf.use_auto_viz(): self._register_auto_viz()
def test_stderr_flush(): ipython_shell = MagicMock() ipython_display = IpythonDisplay() ipython_display._ipython_shell = ipython_shell sys.stderr = MagicMock() ipython_display.send_error(u'Testing Stderr Flush è') assert sys.stderr.flush.call_count == 1
def test_stdout_flush(): ipython_shell = MagicMock() ipython_display = IpythonDisplay() ipython_display._ipython_shell = ipython_shell sys.stdout = MagicMock() ipython_display.write(u'Testing Stdout Flush è') assert sys.stdout.flush.call_count == 1
def __init__(self, shell, data=None, widget=None): super(LinkisMagic, self).__init__(shell) self.ipython_display = IpythonDisplay() self.data = data # if widget is None: # widget = MagicsControllerWidget(self.spark_controller, IpyWidgetFactory(), self.ipython_display) # self.manage_widget = widget self.linkis_client = LinkisClient()
class SparkMagicBase(Magics): def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = SparkLog(u"SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug("Initialized spark magics.") if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event() def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction, session, output_var, quiet): sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction) df = self.spark_controller.run_sqlquery(sqlquery, session) if output_var is not None: self.shell.user_ns[output_var] = df if quiet: return None else: return df @staticmethod def _sqlquery(cell, samplemethod, maxrows, samplefraction): return SQLQuery(cell, samplemethod, maxrows, samplefraction) def _print_endpoint_info(self, info_sessions, current_session_id): if info_sessions: info_sessions = sorted(info_sessions, key=lambda s: s.id) html = u"""<table> <tr><th>ID</th><th>YARN Application ID</th><th>Kind</th><th>State</th><th>Spark UI</th><th>Driver log</th><th>Current session?</th></tr>""" + \ u"".join([SparkMagicBase._session_row_html(session, current_session_id) for session in info_sessions]) + \ u"</table>" self.ipython_display.html(html) else: self.ipython_display.html(u'No active sessions.') @staticmethod def _session_row_html(session, current_session_id): return u"""<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td><td>{4}</td><td>{5}</td><td>{6}</td></tr>""".format( session.id, session.get_app_id(), session.kind, session.status, SparkMagicBase._link(u'Link', session.get_spark_ui_url()), SparkMagicBase._link(u'Link', session.get_driver_log_url()), u"" if current_session_id is None or current_session_id != session.id else u"✔") @staticmethod def _link(text, url): if url is not None: return u"""<a target="_blank" href="{1}">{0}</a>""".format( text, url) else: return u""
class SparkMagicBase(Magics): def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = SparkLog(u"SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug("Initialized spark magics.") if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event() def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction, session, output_var, quiet): sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction) df = self.spark_controller.run_sqlquery(sqlquery, session) if output_var is not None: self.shell.user_ns[output_var] = df if quiet: return None else: return df @staticmethod def _sqlquery(cell, samplemethod, maxrows, samplefraction): return SQLQuery(cell, samplemethod, maxrows, samplefraction) def _print_endpoint_info(self, info_sessions, current_session_id): if info_sessions: info_sessions = sorted(info_sessions, key=lambda s: s.id) html = u"""<table> <tr><th>ID</th><th>YARN Application ID</th><th>Kind</th><th>State</th><th>Spark UI</th><th>Driver log</th><th>Current session?</th></tr>""" + \ u"".join([SparkMagicBase._session_row_html(session, current_session_id) for session in info_sessions]) + \ u"</table>" self.ipython_display.html(html) else: self.ipython_display.html(u'No active sessions.') @staticmethod def _session_row_html(session, current_session_id): return u"""<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td><td>{4}</td><td>{5}</td><td>{6}</td></tr>""".format( session.id, session.get_app_id(), session.kind, session.status, SparkMagicBase._link(u'Link', session.get_spark_ui_url()), SparkMagicBase._link(u'Link', session.get_driver_log_url()), u"" if current_session_id is None or current_session_id != session.id else u"✔" ) @staticmethod def _link(text, url): if url is not None: return u"""<a target="_blank" href="{1}">{0}</a>""".format(text, url) else: return u""
def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = SparkLog(u"SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug("Initialized spark magics.") if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event()
class SparkMagicBase(Magics): def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = SparkLog(u"SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug("Initialized spark magics.") if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event() def execute_spark(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name): (success, out) = self.spark_controller.run_command(Command(cell), session_name) if not success: self.ipython_display.send_error(out) else: self.ipython_display.write(out) if output_var is not None: spark_store_command = self._spark_store_command( output_var, samplemethod, maxrows, samplefraction) df = self.spark_controller.run_command(spark_store_command, session_name) self.shell.user_ns[output_var] = df @staticmethod def _spark_store_command(output_var, samplemethod, maxrows, samplefraction): return SparkStoreCommand(output_var, samplemethod, maxrows, samplefraction) def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction, session, output_var, quiet): sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction) df = self.spark_controller.run_sqlquery(sqlquery, session) if output_var is not None: self.shell.user_ns[output_var] = df if quiet: return None else: return df @staticmethod def _sqlquery(cell, samplemethod, maxrows, samplefraction): return SQLQuery(cell, samplemethod, maxrows, samplefraction) def _print_endpoint_info(self, info_sessions, current_session_id): if info_sessions: info_sessions = sorted(info_sessions, key=lambda s: s.id) html = get_sessions_info_html(info_sessions, current_session_id) self.ipython_display.html(html) else: self.ipython_display.html(u'No active sessions.')
class DataGraph(object): """This does not use the table version of plotly because it freezes up the browser for >60 rows. Instead, we use pandas df HTML representation.""" def __init__(self, display=None): if display is None: self.display = IpythonDisplay() else: self.display = display def render(self, df, encoding, output): with output: max_rows = pd.get_option("display.max_rows") max_cols = pd.get_option("display.max_columns") show_dimensions = pd.get_option("display.show_dimensions") # This will hide the index column for pandas df. self.display.html(""" <style> table.dataframe.hideme thead th:first-child { display: none; } table.dataframe.hideme tbody th { display: none; } </style> """) self.display.html(df.to_html(max_rows=max_rows, max_cols=max_cols, show_dimensions=show_dimensions, notebook=True, classes="hideme")) @staticmethod def display_logarithmic_x_axis(): return False @staticmethod def display_logarithmic_y_axis(): return False @staticmethod def display_x(): return False @staticmethod def display_y(): return False
def __init__(self, df, encoding, renderer=None, ipywidget_factory=None, encoding_widget=None, ipython_display=None, nested_widget_mode=False, spark_events=None, testing=False, **kwargs): assert encoding is not None assert df is not None assert type(df) is pd.DataFrame kwargs['orientation'] = 'vertical' if not testing: super(AutoVizWidget, self).__init__((), **kwargs) self.df = self._convert_to_displayable_dataframe(df) if renderer is None: renderer = GraphRenderer() self.renderer = renderer if ipywidget_factory is None: ipywidget_factory = IpyWidgetFactory() self.ipywidget_factory = ipywidget_factory if encoding_widget is None: encoding_widget = EncodingWidget(self.df, encoding, self.on_render_viz) self.encoding_widget = encoding_widget if ipython_display is None: ipython_display = IpythonDisplay() self.ipython_display = ipython_display self.encoding = encoding # Widget that will become the only child of AutoVizWidget self.widget = self.ipywidget_factory.get_vbox() # Create output area self.to_display = self.ipywidget_factory.get_output() self.to_display.width = "800px" self.output = self.ipywidget_factory.get_hbox() self.output.children = [self.to_display] self.controls = self._create_controls_widget() if spark_events is None: spark_events = AutoVizEvents() self._spark_events = spark_events if nested_widget_mode: self.widget.children = [self.controls, self.output] self.children = [self.widget] else: self.ipython_display.display(self.controls) self.ipython_display.display(self.to_display) self.on_render_viz()
def __init__(self, implementation, implementation_version, language, language_version, language_info, **kwargs): self.implementation = implementation self.implementation_version = implementation_version self.language = language self.language_version = language_version self.language_info = language_info super(HdfsKernelBase, self).__init__(**kwargs) self._fatal_error = None self.ipython_display = IpythonDisplay() self.session_manager = HdfsSessionManager()
class SparkMagicBase(Magics): def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = SparkLog(u"SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug("Initialized spark magics.") if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event() def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction, session, output_var, quiet): sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction) df = self.spark_controller.run_sqlquery(sqlquery, session) if output_var is not None: self.shell.user_ns[output_var] = df if quiet: return None else: return df @staticmethod def _sqlquery(cell, samplemethod, maxrows, samplefraction): return SQLQuery(cell, samplemethod, maxrows, samplefraction) def _print_endpoint_info(self, info_sessions, current_session_id): if info_sessions: info_sessions = sorted(info_sessions, key=lambda s: s.id) html = get_sessions_info_html(info_sessions, current_session_id) self.ipython_display.html(html) else: self.ipython_display.html(u'No active sessions.')
def __init__(self, spark_controller, ipywidget_factory=None, ipython_display=None, nested_widget_mode=False, testing=False, **kwargs): kwargs['orientation'] = 'vertical' if not testing: super(AbstractMenuWidget, self).__init__((), **kwargs) self.spark_controller = spark_controller if ipywidget_factory is None: ipywidget_factory = IpyWidgetFactory() self.ipywidget_factory = ipywidget_factory if ipython_display is None: ipython_display = IpythonDisplay() self.ipython_display = ipython_display self.children = [] if not nested_widget_mode: self._repr_html_()
class SparkKernelBase(IPythonKernel): def __init__(self, implementation, implementation_version, language, language_version, language_info, session_language, user_code_parser=None, **kwargs): # Required by Jupyter - Override self.implementation = implementation self.implementation_version = implementation_version self.language = language self.language_version = language_version self.language_info = language_info # Override self.session_language = session_language super(SparkKernelBase, self).__init__(**kwargs) self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language)) self._fatal_error = None self.ipython_display = IpythonDisplay() if user_code_parser is None: self.user_code_parser = UserCodeParser() else: self.user_code_parser = user_code_parser # Disable warnings for test env in HDI requests.packages.urllib3.disable_warnings() if not kwargs.get("testing", False): self._load_magics_extension() self._change_language() if conf.use_auto_viz(): self._register_auto_viz() def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): def f(self): if self._fatal_error is not None: return self._repeat_fatal_error() return self._do_execute(code, silent, store_history, user_expressions, allow_stdin) return wrap_unexpected_exceptions(f, self._complete_cell)(self) def do_shutdown(self, restart): # Cleanup self._delete_session() return self._do_shutdown_ipykernel(restart) def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin): code_to_run = self.user_code_parser.get_code_to_run(code) res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin) return res def _load_magics_extension(self): register_magics_code = "%load_ext sparkmagic.kernels" self._execute_cell(register_magics_code, True, False, shutdown_if_error=True, log_if_error="Failed to load the Spark kernels magics library.") self.logger.debug("Loaded magics.") def _change_language(self): register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(self.session_language) self._execute_cell(register_magics_code, True, False, shutdown_if_error=True, log_if_error="Failed to change language to {}.".format(self.session_language)) self.logger.debug("Changed language.") def _register_auto_viz(self): from sparkmagic.utils.sparkevents import get_spark_events_handler import autovizwidget.utils.configuration as c handler = get_spark_events_handler() c.override("events_handler", handler) register_auto_viz_code = """from autovizwidget.widget.utils import display_dataframe ip = get_ipython() ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)""" self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True, log_if_error="Failed to register auto viz for notebook.") self.logger.debug("Registered auto viz.") def _delete_session(self): code = "%%_do_not_call_delete_session\n " self._execute_cell_for_user(code, True, False) def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False, shutdown_if_error=False, log_if_error=None): reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin) if shutdown_if_error and reply_content[u"status"] == u"error": error_from_reply = reply_content[u"evalue"] if log_if_error is not None: message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply) return self._abort_with_fatal_error(message) return reply_content def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin) def _do_shutdown_ipykernel(self, restart): return super(SparkKernelBase, self).do_shutdown(restart) def _complete_cell(self): """A method that runs a cell with no effect. Call this and return the value it returns when there's some sort of error preventing the user's cell from executing; this will register the cell from the Jupyter UI as being completed.""" return self._execute_cell("None", False, True, None, False) def _show_user_error(self, message): self.logger.error(message) self.ipython_display.send_error(message) def _queue_fatal_error(self, message): """Queues up a fatal error to be thrown when the next cell is executed; does not raise an error immediately. We use this for errors that happen on kernel startup, since IPython crashes if we throw an exception in the __init__ method.""" self._fatal_error = message def _abort_with_fatal_error(self, message): """Queues up a fatal error and throws it immediately.""" self._queue_fatal_error(message) return self._repeat_fatal_error() def _repeat_fatal_error(self): """Throws an error that has already been queued.""" error = conf.fatal_error_suggestion().format(self._fatal_error) self.logger.error(error) self.ipython_display.send_error(error) return self._complete_cell()
class LinkisMagic(Magics): def __init__(self, shell, data=None, widget=None): super(LinkisMagic, self).__init__(shell) self.ipython_display = IpythonDisplay() self.data = data # if widget is None: # widget = MagicsControllerWidget(self.spark_controller, IpyWidgetFactory(), self.ipython_display) # self.manage_widget = widget self.linkis_client = LinkisClient() @magic_arguments() @line_cell_magic @needs_local_scope @argument("-o", "--output", type=str, default=None, help="Output of Job ") @argument("-p", "--path", type=str, default=None, help="Download output in path ") @argument("-q", "--quiet", type=str, default=True, help="Do not display result on console") def spark(self, line, cell="", local_ns=None): user_input = parse_argstring(self.spark, line) code = cell status, exec_id, task_id = self.linkis_client.execute("spark", code, run_type="spark") if not status: raise Exception("HTTPException") else: status, exec_status, log, result = self.linkis_client.get_execute_result( exec_id, task_id) if status: if exec_status == "Failed": print(log["keyword"]) print(log["log"]) else: if user_input.output is not None: self.shell.user_ns[user_input.output] = result if user_input.path is not None: status, result = self.linkis_client.download_by_pipeline_engine( task_id, user_input.path) if status and result == "None": raise Exception("Save Error, Result dir is None") elif not status or result != "Success": raise Exception("Save Error, Result: " + result) if user_input.quiet == "False" or user_input.quiet == "false": return result else: raise Exception("HTTPException: get_execute_result error") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-o", "--output", type=str, default=None, help="Output of Job ") @argument("-p", "--path", type=str, default=None, help="Download output in path ") @argument("-q", "--quiet", type=str, default=True, help="Do not display result on console") @argument("-v", "--var", type=str, default=None, help="transport var from spark cluster to local python ") @argument("-u", "--upload", type=str, default=None, help="transport var from local python to spark cluster") def pyspark(self, line, cell="", local_ns=None): user_input = parse_argstring(self.pyspark, line) pyspark_code = cell if user_input.upload is not None: pyspark_code = self.linkis_client.pyspark_load_pickle_code( user_input.upload) + pyspark_code self.linkis_client.save_pickle_file( user_input.upload, self.shell.user_ns[user_input.upload]) if user_input.var is not None: pyspark_code = pyspark_code + self.linkis_client.define_pickel_code( user_input.var) status, exec_id, task_id = self.linkis_client.execute( "spark", pyspark_code, run_type="python") if not status: raise Exception("HTTPException: execute job error") else: status, exec_status, log, result = self.linkis_client.get_execute_result( exec_id, task_id) if status: if exec_status == "Failed": print(log["keyword"]) print(log["log"]) else: if user_input.output is not None: self.shell.user_ns[user_input.output] = result if user_input.path is not None: status, result = self.linkis_client.download_by_pipeline_engine( task_id, user_input.path) if status and result == "None": raise Exception("Save Error, Result dir is None") elif not status or result != "Success": raise Exception("Save Error, Result: " + result) if user_input.var is not None: self.shell.user_ns[ user_input. var] = self.linkis_client.load_pickle_var( user_input.var) if user_input.upload is not None: self.linkis_client.delete_upload_var(user_input.upload) # self.ipython_display.display(result) if user_input.quiet == "False" or user_input.quiet == "false": return result else: raise Exception("HTTPException: get_execute_result error") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-o", "--output", type=str, default=None, help="Output of Job ") @argument("-p", "--path", type=str, default=None, help="Download output in path ") @argument("-q", "--quiet", type=str, default=True, help="Do not display result on console") def sql(self, line, cell="", local_ns=None): user_input = parse_argstring(self.sql, line) sql_code = 'if "hiveContext" not in locals().keys():\n \ \tfrom pyspark.sql import HiveContext\n\ \thiveContext = HiveContext(sc)\n' cell_list = cell.split("\n") for i in range(len(cell_list)): if "" != cell_list[i]: sql_code = sql_code + 'hiveContext.sql("' + cell_list[ i] + '").show()' + '\n' print(sql_code) status, exec_id, task_id = self.linkis_client.execute( "spark", sql_code, run_type="python") if not status: raise Exception("HTTPException") else: status, exec_status, log, result = self.linkis_client.get_execute_result( exec_id, task_id) if status: if exec_status == "Failed": print(log["keyword"]) print(log["log"]) else: if user_input.output is not None: self.shell.user_ns[user_input.output] = result if user_input.path is not None: status, result = self.linkis_client.download_by_pipeline_engine( task_id) if status and result == "None": raise Exception("Save Error, Result dir is None") elif not status or result != "Success": raise Exception("Save Error, Result: " + result) # self.ipython_display.display(result) if user_input.quiet == "False" or user_input.quiet == "false": return result else: raise Exception("HTTPException") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-o", "--output", type=str, default=None, help="output var of Job ") @argument("-p", "--path", type=str, default=None, help="Download output in path ") @argument("-q", "--quiet", type=str, default=True, help="Do not display result on console") def sparksql(self, line, cell="", local_ns=None): user_input = parse_argstring(self.sparksql, line) code = cell status, exec_id, task_id = self.linkis_client.execute("spark", code, run_type="sql") if not status: raise Exception("HTTPException") else: status, exec_status, log, result = self.linkis_client.get_execute_result( exec_id, task_id) if status: if exec_status == "Failed": print(log["keyword"]) print(log["log"]) else: if user_input.output is not None: self.shell.user_ns[user_input.output] = result if user_input.path is not None: # status, result = self.linkis_client.download_csv(task_id, user_input.path) status, result = self.linkis_client.download_by_pipeline_engine( task_id, user_input.path) if status != True or result != "Success": raise Exception("Save Error, result: " + result) # self.ipython_display.display(result) if user_input.quiet == "False" or user_input.quiet == "false": return result else: raise Exception("HTTPException") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-o", "--output", type=str, default=None, help="Output of Job ") def listjob(self, line, cell="", local_ns=None): user_input = parse_argstring(self.listjob, line) job_list = self.linkis_client.job_history() if user_input.output is not None: self.shell.user_ns[user_input.output] = job_list else: return job_list @magic_arguments() @line_cell_magic @needs_local_scope @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ") def progress(self, line, cell="", local_ns=None): user_input = parse_argstring(self.kill, line) status, result = self.linkis_client.progress(user_input.id) if status: print(result) else: raise Exception("HTTPException") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ") def kill(self, line, cell="", local_ns=None): kill_input = parse_argstring(self.kill, line) # print(kill_input.id) status, result = self.linkis_client.kill(kill_input.id) print(result) if status: print("Succeed") else: print("ERROR") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ") def log(self, line, cell="", local_ns=None): user_input = parse_argstring(self.kill, line) status, result = self.linkis_client.log(user_input.id) if status: print(result) else: raise Exception("HTTPException") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ") def status(self, line, cell="", local_ns=None): user_input = parse_argstring(self.kill, line) status, result = self.linkis_client.status(user_input.id) if status: print(result) else: raise Exception("HTTPException") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-o", "--output", type=str, default=None, help="Output of Job ") def listengine(self, line, cell="", local_ns=None): user_input = parse_argstring(self.listengine, line) status, engine_list = self.linkis_client.engines() if not status: raise Exception("Http Exception") if user_input.output is not None: self.shell.user_ns[user_input.output] = engine_list else: return engine_list @magic_arguments() @line_cell_magic @needs_local_scope @argument("-i", "--instance", type=str, default=None, help="Instance of Engine ") def enginekill(self, line, cell="", local_ns=None): user_input = parse_argstring(self.enginekill, line) status, result = self.linkis_client.engine_kill(user_input.instance) if status: print("Success") else: raise Exception("HTTPException") def load_ipython_extension(ip): ip.register_magics(LinkisMagic) # 优化log显示,分为详细log和关键性息 def log_detail(self): pass @magic_arguments() @line_cell_magic @needs_local_scope def flashcookies(self, line, cell="", local_ns=None): user_input = parse_argstring(self.listengine, line) self.linkis_client.refresh_cookies() self.ipython_display.display("Refresh Cookies Successful.")
def __init__(self, display=None): if display is None: self.display = IpythonDisplay() else: self.display = display
class SparkKernelBase(IPythonKernel): def __init__(self, implementation, implementation_version, language, language_version, language_info, session_language, user_code_parser=None, **kwargs): # Required by Jupyter - Override self.implementation = implementation self.implementation_version = implementation_version self.language = language self.language_version = language_version self.language_info = language_info # Override self.session_language = session_language super(SparkKernelBase, self).__init__(**kwargs) self.logger = SparkLog(u"{}_jupyter_kernel".format( self.session_language)) self._fatal_error = None self.ipython_display = IpythonDisplay() if user_code_parser is None: self.user_code_parser = UserCodeParser() else: self.user_code_parser = user_code_parser # Disable warnings for test env in HDI requests.packages.urllib3.disable_warnings() if not kwargs.get("testing", False): self._load_magics_extension() self._change_language() if conf.use_auto_viz(): self._register_auto_viz() def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): def f(self): if self._fatal_error is not None: return self._repeat_fatal_error() return self._do_execute(code, silent, store_history, user_expressions, allow_stdin) return wrap_unexpected_exceptions(f, self._complete_cell)(self) def do_shutdown(self, restart): # Cleanup # self._delete_session() return self._do_shutdown_ipykernel(restart) def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin): code_to_run = self.user_code_parser.get_code_to_run(code) res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin) return res def _load_magics_extension(self): register_magics_code = "%load_ext sparkmagic.kernels" self._execute_cell( register_magics_code, True, False, shutdown_if_error=True, log_if_error="Failed to load the Spark kernels magics library.") self.logger.debug("Loaded magics.") def _change_language(self): register_magics_code = "%%_do_not_call_change_language -l {}\n ".format( self.session_language) self._execute_cell( register_magics_code, True, False, shutdown_if_error=True, log_if_error="Failed to change language to {}.".format( self.session_language)) self.logger.debug("Changed language.") def _register_auto_viz(self): from sparkmagic.utils.sparkevents import get_spark_events_handler import autovizwidget.utils.configuration as c handler = get_spark_events_handler() c.override("events_handler", handler) register_auto_viz_code = """from autovizwidget.widget.utils import display_dataframe ip = get_ipython() ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)""" self._execute_cell( register_auto_viz_code, True, False, shutdown_if_error=True, log_if_error="Failed to register auto viz for notebook.") self.logger.debug("Registered auto viz.") def _delete_session(self): code = "%%_do_not_call_delete_session\n " self._execute_cell_for_user(code, True, False) def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False, shutdown_if_error=False, log_if_error=None): reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin) if shutdown_if_error and reply_content[u"status"] == u"error": error_from_reply = reply_content[u"evalue"] if log_if_error is not None: message = "{}\nException details:\n\t\"{}\"".format( log_if_error, error_from_reply) return self._abort_with_fatal_error(message) return reply_content def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin) def _do_shutdown_ipykernel(self, restart): return super(SparkKernelBase, self).do_shutdown(restart) def _complete_cell(self): """A method that runs a cell with no effect. Call this and return the value it returns when there's some sort of error preventing the user's cell from executing; this will register the cell from the Jupyter UI as being completed.""" return self._execute_cell("None", False, True, None, False) def _show_user_error(self, message): self.logger.error(message) self.ipython_display.send_error(message) def _queue_fatal_error(self, message): """Queues up a fatal error to be thrown when the next cell is executed; does not raise an error immediately. We use this for errors that happen on kernel startup, since IPython crashes if we throw an exception in the __init__ method.""" self._fatal_error = message def _abort_with_fatal_error(self, message): """Queues up a fatal error and throws it immediately.""" self._queue_fatal_error(message) return self._repeat_fatal_error() def _repeat_fatal_error(self): """Throws an error that has already been queued.""" error = conf.fatal_error_suggestion().format(self._fatal_error) self.logger.error(error) self.ipython_display.send_error(error) return self._complete_cell()
import subprocess import re import random import urllib3.util from hdijupyterutils.ipythondisplay import IpythonDisplay import ipyvuetify as v from google.cloud import dataproc_v1beta2 import google.auth.transport.requests from google.auth import _cloud_sdk from google.auth.exceptions import UserAccessTokenError from google.oauth2.credentials import Credentials from sparkmagic.auth.customauth import Authenticator from sparkmagic.livyclientlib.exceptions import BadUserConfigurationException import googledataprocauthenticator.utils.constants as constants ipython_display = IpythonDisplay() def list_credentialed_user_accounts(): """Load all of user's credentialed accounts with ``gcloud auth list`` command. Returns: Sequence[str]: each value is a str of one of the users credentialed accounts Raises: sparkmagic.livyclientlib.BadUserConfigurationException: if gcloud cannot be invoked """ accounts_json = "" if os.name == "nt": command = constants.CLOUD_SDK_WINDOWS_COMMAND else:
class SparkMagicBase(Magics): def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = SparkLog(u"SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug("Initialized spark magics.") if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event() def _get_session_name_by_session(self, session): session_name = self.spark_controller.session_manager.get_session_name_by_id( session.id) # 如果session不存在,则将session激活并加入session_list if not session_name: session_name = session.session_name if session_name: self.spark_controller.session_manager.add_session( session_name, session) session.already_start() return session_name else: return session_name return None def init_livy_session(self, language="python"): ''' 执行sql时自动初始化sql :return: ''' return self.__get_or_create_session(language) def __get_or_create_session(self, language): proxy_user = getpass.getuser() self.session_language = language endpoint = build_endpoint(self.session_language) kernel_instance_id = id(self.shell.kernel) session_name_seleted = self.spark_controller.generate_livy_session_name( kernel_instance_id) properties = conf.get_session_properties(self.session_language) properties["proxyUser"] = proxy_user properties["session_language"] = self.session_language properties["session_name"] = session_name_seleted session_info_list = self.spark_controller.get_all_sessions_endpoint( endpoint) for session in session_info_list: # session kind 必须一致 if session.kind != properties['kind']: continue # 区分pyspark 及 pyspark3 if session.session_language != properties['session_language']: continue session_name = self._get_session_name_by_session(session) if session_name == session_name_seleted: if session.status in constants.HEALTHY_SESSION_STATUS: return session_name_seleted elif session.status in constants.FINAL_STATEMENT_STATUS: # FINAL, recreate new session self.spark_controller.add_session(session_name_seleted, endpoint, False, properties) return session_name_seleted else: # 如果livy中没有session,则创建session self.spark_controller.add_session(session_name_seleted, endpoint, False, properties) return session_name_seleted def execute_spark(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce): (success, out) = self.spark_controller.run_command(Command(cell), session_name) if not success: self.ipython_display.send_error(out) else: if isinstance(out, string_types): self.ipython_display.write(out) elif isinstance(out, dict): df = convert_data_struct_to_dataframe(out) html = df.fillna('NULL').astype(str).to_html(notebook=True) self.ipython_display.html(html) else: self.ipython_display.write(out) if output_var is not None: spark_store_command = self._spark_store_command( output_var, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_command(spark_store_command, session_name) self.shell.user_ns[output_var] = df @staticmethod def _spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce): return SparkStoreCommand(output_var, samplemethod, maxrows, samplefraction, coerce=coerce) def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction, session, output_var, quiet, coerce): sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_sqlquery(sqlquery, session) if output_var is not None: self.shell.user_ns[output_var] = df if quiet: return None else: return df @staticmethod def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce): return SQLQuery(cell, samplemethod, maxrows, samplefraction, coerce=coerce) def _print_endpoint_info(self, info_sessions, current_session_id): if info_sessions: info_sessions = sorted(info_sessions, key=lambda s: s.id) html = get_sessions_info_html(info_sessions, current_session_id) self.ipython_display.html(html) else: self.ipython_display.html(u'No active sessions.')
class SparkMagicBase(Magics): _STRING_VAR_TYPE = 'str' _PANDAS_DATAFRAME_VAR_TYPE = 'df' _ALLOWED_LOCAL_TO_SPARK_TYPES = [ _STRING_VAR_TYPE, _PANDAS_DATAFRAME_VAR_TYPE ] def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = SparkLog(u"SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug(u'Initialized spark magics.') if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event() def do_send_to_spark(self, cell, input_variable_name, var_type, output_variable_name, max_rows, session_name): try: input_variable_value = self.shell.user_ns[input_variable_name] except KeyError: raise BadUserDataException( u'Variable named {} not found.'.format(input_variable_name)) if input_variable_value is None: raise BadUserDataException( u'Value of {} is None!'.format(input_variable_name)) if not output_variable_name: output_variable_name = input_variable_name if not max_rows: max_rows = conf.default_maxrows() input_variable_type = var_type.lower() if input_variable_type == self._STRING_VAR_TYPE: command = SendStringToSparkCommand(input_variable_name, input_variable_value, output_variable_name) elif input_variable_type == self._PANDAS_DATAFRAME_VAR_TYPE: command = SendPandasDfToSparkCommand(input_variable_name, input_variable_value, output_variable_name, max_rows) else: raise BadUserDataException( u'Invalid or incorrect -t type. Available are: [{}]'.format( u','.join(self._ALLOWED_LOCAL_TO_SPARK_TYPES))) (success, result, mime_type) = self.spark_controller.run_command(command, None) if not success: self.ipython_display.send_error(result) else: self.ipython_display.write( u'Successfully passed \'{}\' as \'{}\' to Spark' u' kernel'.format(input_variable_name, output_variable_name)) def execute_final(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce): (success, out, mimetype) = self.spark_controller.run_command(Command(cell), session_name) if not success: if conf.shutdown_session_on_spark_statement_errors(): self.spark_controller.cleanup() raise SparkStatementException(out) else: if isinstance(out, string_types): if mimetype == MIMETYPE_TEXT_HTML: self.ipython_display.html(out) else: self.ipython_display.write(out) else: self.ipython_display.display(out) if output_var is not None: spark_store_command = self._spark_store_command( output_var, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_command(spark_store_command, session_name) self.shell.user_ns[output_var] = df def execute_spark(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce): if "lagom as" in cell: self.ipython_display.send_error( "You are not allowed to do the following: 'import maggy.experiment.lagom as ...'. Please, just use 'import maggy.experiment as experiment' (or something else)" ) raise elif ".lagom" in cell: client = Client(self.spark_controller, self.session_name, 5, self.ipython_display) try: client.start_heartbeat() if DEBUG: self.ipython_display.writeln("Started heartbeating...") self.execute_final(cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce) except: raise finally: # 4. Kill thread before leaving current scope client.stop() try: client.close() except: if DEBUG: print("Socket already closed by maggy server.") pass else: self.execute_final(cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce) @staticmethod def _spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce): return SparkStoreCommand(output_var, samplemethod, maxrows, samplefraction, coerce=coerce) def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction, session, output_var, quiet, coerce): sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_sqlquery(sqlquery, session) if output_var is not None: self.shell.user_ns[output_var] = df if quiet: return None else: return df @staticmethod def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce): return SQLQuery(cell, samplemethod, maxrows, samplefraction, coerce=coerce) def _print_endpoint_info(self, info_sessions, current_session_id): if info_sessions: info_sessions = sorted(info_sessions, key=lambda s: s.id) html = get_sessions_info_html(info_sessions, current_session_id) self.ipython_display.html(html) else: self.ipython_display.html(u'No active sessions.')
class SparkMagicBase(Magics): def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = SparkLog(u"SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug("Initialized spark magics.") if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event() def execute_final(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce): (success, out) = self.spark_controller.run_command(Command(cell), session_name) if not success: self.ipython_display.send_error(out) else: self.ipython_display.write(out) if output_var is not None: spark_store_command = self._spark_store_command( output_var, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_command(spark_store_command, session_name) self.shell.user_ns[output_var] = df def execute_spark(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce): if "lagom as" in cell: self.ipython_display.send_error( "You are not allowed to do the following: 'import maggy.experiment.lagom as ...'. Please, just use 'import maggy.experiment as experiment' (or something else)" ) raise elif ".lagom" in cell: client = Client(self.spark_controller, self.session_name, 5, self.ipython_display) try: client.start_heartbeat() if DEBUG: self.ipython_display.writeln("Started heartbeating...") self.execute_final(cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce) except: raise finally: # 4. Kill thread before leaving current scope client.stop() try: client.close() except: if DEBUG: print("Socket already closed by maggy server.") pass else: self.execute_final(cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce) @staticmethod def _spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce): return SparkStoreCommand(output_var, samplemethod, maxrows, samplefraction, coerce=coerce) def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction, session, output_var, quiet, coerce): sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_sqlquery(sqlquery, session) if output_var is not None: self.shell.user_ns[output_var] = df if quiet: return None else: return df @staticmethod def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce): return SQLQuery(cell, samplemethod, maxrows, samplefraction, coerce=coerce) def _print_endpoint_info(self, info_sessions, current_session_id): if info_sessions: info_sessions = sorted(info_sessions, key=lambda s: s.id) html = get_sessions_info_html(info_sessions, current_session_id) self.ipython_display.html(html) else: self.ipython_display.html(u'No active sessions.')
class SparkKernelBase(IPythonKernel): def __init__(self, implementation, implementation_version, language, language_version, language_info, session_language, user_code_parser=None, **kwargs): # Required by Jupyter - Override self.implementation = implementation self.implementation_version = implementation_version self.language = language self.language_version = language_version self.language_info = language_info # Override self.session_language = session_language super(SparkKernelBase, self).__init__(**kwargs) self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language)) self._fatal_error = None self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) if user_code_parser is None: self.user_code_parser = UserCodeParser() else: self.user_code_parser = user_code_parser # Disable warnings for test env in HDI requests.packages.urllib3.disable_warnings() if not kwargs.get("testing", False): self._load_magics_extension() self._change_language() # 项目启动的时候初始化sparkmagic.magic 和session self._load_spark_magics_extension() self._init_livy_session() if conf.use_auto_viz(): self._register_auto_viz() def _is_sql_filter(self, code): if conf.is_sql_restrict(): if re.search(r'\s*show\s+databases', code.lower()): return True if re.search(r'\s*use\s+', code.lower()): return True return False def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): def f(self): if self._is_sql_filter(code): self.ipython_display.write("已为您选择好专属数据库, 直接使用show tables 试试看") return self._complete_cell() if self._fatal_error is not None: return self._repeat_fatal_error() return self._do_execute(code, silent, store_history, user_expressions, allow_stdin) return wrap_unexpected_exceptions(f, self._complete_cell)(self) def do_shutdown(self, restart): # Cleanup self._delete_session() return self._do_shutdown_ipykernel(restart) def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin): code_to_run = self.user_code_parser.get_code_to_run(code) res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin) return res def _load_magics_extension(self): register_magics_code = "%load_ext sparkmagic.kernels" self._execute_cell(register_magics_code, True, False, shutdown_if_error=True, log_if_error="Failed to load the Spark kernels magics library.") self.logger.debug("Loaded magics.") def _load_spark_magics_extension(self): ''' 初始化spark.magic,类似执行%load_ext sparkmagic.magics :return: ''' register_spark_magics_code = "%load_ext sparkmagic.magics" self._execute_cell(register_spark_magics_code, True, False, shutdown_if_error=True, log_if_error="Failed to load the Spark Magics library.") self.logger.debug("Loaded sparkmagic.magics") def _change_language(self): register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(self.session_language) self._execute_cell(register_magics_code, True, False, shutdown_if_error=True, log_if_error="Failed to change language to {}.".format(self.session_language)) self.logger.debug("Changed language.") def _init_livy_session(self): ''' 初始化session不应该在此类执行具体操作,应该委派kernelmagics初始化session, :return: ''' register_magics_code = "%%_do_not_call_init_livy_session -i {}\n ".format(self.session_language) self._execute_cell(register_magics_code, True, False, shutdown_if_error=True, log_if_error="Failed to init livy session: {}.".format(self.session_language)) self.logger.debug("Init livy session.") def _register_auto_viz(self): from sparkmagic.utils.sparkevents import get_spark_events_handler import autovizwidget.utils.configuration as c handler = get_spark_events_handler() c.override("events_handler", handler) register_auto_viz_code = """from autovizwidget.widget.utils import display_dataframe ip = get_ipython() ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)""" self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True, log_if_error="Failed to register auto viz for notebook.") self.logger.debug("Registered auto viz.") def _delete_session(self): code = "%%_do_not_call_delete_session\n " self._execute_cell_for_user(code, True, False) def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False, shutdown_if_error=False, log_if_error=None): reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin) if shutdown_if_error and reply_content[u"status"] == u"error": error_from_reply = reply_content[u"evalue"] if log_if_error is not None: message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply) return self._abort_with_fatal_error(message) return reply_content def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin) def _do_shutdown_ipykernel(self, restart): return super(SparkKernelBase, self).do_shutdown(restart) def _complete_cell(self): """A method that runs a cell with no effect. Call this and return the value it returns when there's some sort of error preventing the user's cell from executing; this will register the cell from the Jupyter UI as being completed.""" return self._execute_cell("None", False, True, None, False) def _show_user_error(self, message): self.logger.error(message) self.ipython_display.send_error(message) def _queue_fatal_error(self, message): """Queues up a fatal error to be thrown when the next cell is executed; does not raise an error immediately. We use this for errors that happen on kernel startup, since IPython crashes if we throw an exception in the __init__ method.""" self._fatal_error = message def _abort_with_fatal_error(self, message): """Queues up a fatal error and throws it immediately.""" self._queue_fatal_error(message) return self._repeat_fatal_error() def _repeat_fatal_error(self): """Throws an error that has already been queued.""" error = conf.fatal_error_suggestion().format(self._fatal_error) self.logger.error(error) self.ipython_display.send_error(error) return self._complete_cell() @gen.coroutine def complete_request(self, stream, ident, parent): content = parent['content'] code = content['code'] cursor_pos = content['cursor_pos'] matches = yield gen.maybe_future(self.do_complete(code, cursor_pos)) matches = json_clean(matches) completion_msg = self.session.send(stream, 'complete_reply', matches, parent, ident) def _experimental_do_complete(self, code, cursor_pos): """ Experimental completions from IPython, using livy completion. """ code = code.strip() if cursor_pos is None: cursor_pos = len(code) matches = [] with provisionalcompleter(): session_name = self.spark_controller.generate_livy_session_name(id(self)) endpoint = build_endpoint(self.session_language) session_info_list = self.spark_controller.get_all_sessions_endpoint(endpoint) session_id = None for session in session_info_list: if session.session_name == session_name: session_id = session.id if session_id: # Only complete the cursor_line cursor_line, cursor_column = position_to_cursor(code, cursor_pos) lines = code.split("\n") completion_line = lines[cursor_line] before_lines = lines[:cursor_line] if len(lines) > 1 and cursor_line > 0: real_cursor_pos = cursor_pos - len("\n".join(before_lines)) - 1 else: real_cursor_pos = cursor_pos http_client = self.spark_controller._http_client(endpoint) kind = conf.get_livy_kind(self.session_language) res_completions = http_client.post_completion(session_id, kind, completion_line, real_cursor_pos) matches = res_completions.get("candidates", []) if matches: s = self.__get_cursor_start(code, cursor_pos, matches[0]) else: s = cursor_pos res = { 'matches': matches, 'cursor_end': cursor_pos, 'cursor_start': s, 'metadata': {}, 'status': 'ok' } return res def __get_cursor_start(self, code, cursor_pos, match): before_code = code[:cursor_pos] before_code_rev = before_code[::-1] bucket = [] for c in before_code_rev: if len(bucket) >= len(match): break if re.match(r"\w", c): bucket.insert(0, c) else: break if c == match[0]: bucket_len = len(bucket) completion_match_prefix = "".join(bucket) if completion_match_prefix == match[:bucket_len]: return cursor_pos - bucket_len return cursor_pos def do_apply(self, content, bufs, msg_id, reply_metadata): from sparkmagic.messages_api.apply_request import ApplyRequestHandler result_buf = [] reply_content = ApplyRequestHandler(self).dispath_request(content) return reply_content, result_buf
class SparkMagicBase(Magics): _STRING_VAR_TYPE = 'str' _PANDAS_DATAFRAME_VAR_TYPE = 'df' _ALLOWED_LOCAL_TO_SPARK_TYPES = [ _STRING_VAR_TYPE, _PANDAS_DATAFRAME_VAR_TYPE ] def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = SparkLog(u"SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug(u'Initialized spark magics.') if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event() def do_send_to_spark(self, cell, input_variable_name, var_type, output_variable_name, max_rows, session_name): try: input_variable_value = self.shell.user_ns[input_variable_name] except KeyError: raise BadUserDataException( u'Variable named {} not found.'.format(input_variable_name)) if input_variable_value is None: raise BadUserDataException( u'Value of {} is None!'.format(input_variable_name)) if not output_variable_name: output_variable_name = input_variable_name if not max_rows: max_rows = conf.default_maxrows() input_variable_type = var_type.lower() if input_variable_type == self._STRING_VAR_TYPE: command = SendStringToSparkCommand(input_variable_name, input_variable_value, output_variable_name) elif input_variable_type == self._PANDAS_DATAFRAME_VAR_TYPE: command = SendPandasDfToSparkCommand(input_variable_name, input_variable_value, output_variable_name, max_rows) else: raise BadUserDataException( u'Invalid or incorrect -t type. Available are: [{}]'.format( u','.join(self._ALLOWED_LOCAL_TO_SPARK_TYPES))) (success, result, mime_type) = self.spark_controller.run_command(command, None) if not success: self.ipython_display.send_error(result) else: self.ipython_display.write( u'Successfully passed \'{}\' as \'{}\' to Spark' u' kernel'.format(input_variable_name, output_variable_name)) def execute_spark(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce, output_handler=None): output_handler = output_handler or SparkOutputHandler( html=self.ipython_display.html, text=self.ipython_display.write, default=self.ipython_display.display) (success, out, mimetype) = self.spark_controller.run_command(Command(cell), session_name) if not success: if conf.shutdown_session_on_spark_statement_errors(): self.spark_controller.cleanup() raise SparkStatementException(out) else: if isinstance(out, string_types): if mimetype == MIMETYPE_TEXT_HTML: output_handler.html(out) else: output_handler.text(out) else: output_handler.default(out) if output_var is not None: spark_store_command = self._spark_store_command( output_var, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_command(spark_store_command, session_name) self.shell.user_ns[output_var] = df @staticmethod def _spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce): return SparkStoreCommand(output_var, samplemethod, maxrows, samplefraction, coerce=coerce) def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction, session, output_var, quiet, coerce): sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_sqlquery(sqlquery, session) if output_var is not None: self.shell.user_ns[output_var] = df if quiet: return None else: return df @staticmethod def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce): return SQLQuery(cell, samplemethod, maxrows, samplefraction, coerce=coerce) def _print_endpoint_info(self, info_sessions, current_session_id): if info_sessions: info_sessions = sorted(info_sessions, key=lambda s: s.id) html = get_sessions_info_html(info_sessions, current_session_id) self.ipython_display.html(html) else: self.ipython_display.html(u'No active sessions.')