class SparkMagicBase(Magics): def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = SparkLog(u"SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug("Initialized spark magics.") if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event() def execute_spark(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce): (success, out, mimetype) = self.spark_controller.run_command(Command(cell), session_name) if not success: self.ipython_display.send_error(out) else: if isinstance(out, string_types): if mimetype == MIMETYPE_TEXT_HTML: self.ipython_display.html(out) else: self.ipython_display.write(out) else: self.ipython_display.display(out) if output_var is not None: spark_store_command = self._spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_command(spark_store_command, session_name) self.shell.user_ns[output_var] = df @staticmethod def _spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce): return SparkStoreCommand(output_var, samplemethod, maxrows, samplefraction, coerce=coerce) def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction, session, output_var, quiet, coerce): sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_sqlquery(sqlquery, session) if output_var is not None: self.shell.user_ns[output_var] = df if quiet: return None else: return df @staticmethod def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce): return SQLQuery(cell, samplemethod, maxrows, samplefraction, coerce=coerce) def _print_endpoint_info(self, info_sessions, current_session_id): if info_sessions: info_sessions = sorted(info_sessions, key=lambda s: s.id) html = get_sessions_info_html(info_sessions, current_session_id) self.ipython_display.html(html) else: self.ipython_display.html(u'No active sessions.')
class LinkisMagic(Magics): def __init__(self, shell, data=None, widget=None): super(LinkisMagic, self).__init__(shell) self.ipython_display = IpythonDisplay() self.data = data # if widget is None: # widget = MagicsControllerWidget(self.spark_controller, IpyWidgetFactory(), self.ipython_display) # self.manage_widget = widget self.linkis_client = LinkisClient() @magic_arguments() @line_cell_magic @needs_local_scope @argument("-o", "--output", type=str, default=None, help="Output of Job ") @argument("-p", "--path", type=str, default=None, help="Download output in path ") @argument("-q", "--quiet", type=str, default=True, help="Do not display result on console") def spark(self, line, cell="", local_ns=None): user_input = parse_argstring(self.spark, line) code = cell status, exec_id, task_id = self.linkis_client.execute("spark", code, run_type="spark") if not status: raise Exception("HTTPException") else: status, exec_status, log, result = self.linkis_client.get_execute_result( exec_id, task_id) if status: if exec_status == "Failed": print(log["keyword"]) print(log["log"]) else: if user_input.output is not None: self.shell.user_ns[user_input.output] = result if user_input.path is not None: status, result = self.linkis_client.download_by_pipeline_engine( task_id, user_input.path) if status and result == "None": raise Exception("Save Error, Result dir is None") elif not status or result != "Success": raise Exception("Save Error, Result: " + result) if user_input.quiet == "False" or user_input.quiet == "false": return result else: raise Exception("HTTPException: get_execute_result error") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-o", "--output", type=str, default=None, help="Output of Job ") @argument("-p", "--path", type=str, default=None, help="Download output in path ") @argument("-q", "--quiet", type=str, default=True, help="Do not display result on console") @argument("-v", "--var", type=str, default=None, help="transport var from spark cluster to local python ") @argument("-u", "--upload", type=str, default=None, help="transport var from local python to spark cluster") def pyspark(self, line, cell="", local_ns=None): user_input = parse_argstring(self.pyspark, line) pyspark_code = cell if user_input.upload is not None: pyspark_code = self.linkis_client.pyspark_load_pickle_code( user_input.upload) + pyspark_code self.linkis_client.save_pickle_file( user_input.upload, self.shell.user_ns[user_input.upload]) if user_input.var is not None: pyspark_code = pyspark_code + self.linkis_client.define_pickel_code( user_input.var) status, exec_id, task_id = self.linkis_client.execute( "spark", pyspark_code, run_type="python") if not status: raise Exception("HTTPException: execute job error") else: status, exec_status, log, result = self.linkis_client.get_execute_result( exec_id, task_id) if status: if exec_status == "Failed": print(log["keyword"]) print(log["log"]) else: if user_input.output is not None: self.shell.user_ns[user_input.output] = result if user_input.path is not None: status, result = self.linkis_client.download_by_pipeline_engine( task_id, user_input.path) if status and result == "None": raise Exception("Save Error, Result dir is None") elif not status or result != "Success": raise Exception("Save Error, Result: " + result) if user_input.var is not None: self.shell.user_ns[ user_input. var] = self.linkis_client.load_pickle_var( user_input.var) if user_input.upload is not None: self.linkis_client.delete_upload_var(user_input.upload) # self.ipython_display.display(result) if user_input.quiet == "False" or user_input.quiet == "false": return result else: raise Exception("HTTPException: get_execute_result error") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-o", "--output", type=str, default=None, help="Output of Job ") @argument("-p", "--path", type=str, default=None, help="Download output in path ") @argument("-q", "--quiet", type=str, default=True, help="Do not display result on console") def sql(self, line, cell="", local_ns=None): user_input = parse_argstring(self.sql, line) sql_code = 'if "hiveContext" not in locals().keys():\n \ \tfrom pyspark.sql import HiveContext\n\ \thiveContext = HiveContext(sc)\n' cell_list = cell.split("\n") for i in range(len(cell_list)): if "" != cell_list[i]: sql_code = sql_code + 'hiveContext.sql("' + cell_list[ i] + '").show()' + '\n' print(sql_code) status, exec_id, task_id = self.linkis_client.execute( "spark", sql_code, run_type="python") if not status: raise Exception("HTTPException") else: status, exec_status, log, result = self.linkis_client.get_execute_result( exec_id, task_id) if status: if exec_status == "Failed": print(log["keyword"]) print(log["log"]) else: if user_input.output is not None: self.shell.user_ns[user_input.output] = result if user_input.path is not None: status, result = self.linkis_client.download_by_pipeline_engine( task_id) if status and result == "None": raise Exception("Save Error, Result dir is None") elif not status or result != "Success": raise Exception("Save Error, Result: " + result) # self.ipython_display.display(result) if user_input.quiet == "False" or user_input.quiet == "false": return result else: raise Exception("HTTPException") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-o", "--output", type=str, default=None, help="output var of Job ") @argument("-p", "--path", type=str, default=None, help="Download output in path ") @argument("-q", "--quiet", type=str, default=True, help="Do not display result on console") def sparksql(self, line, cell="", local_ns=None): user_input = parse_argstring(self.sparksql, line) code = cell status, exec_id, task_id = self.linkis_client.execute("spark", code, run_type="sql") if not status: raise Exception("HTTPException") else: status, exec_status, log, result = self.linkis_client.get_execute_result( exec_id, task_id) if status: if exec_status == "Failed": print(log["keyword"]) print(log["log"]) else: if user_input.output is not None: self.shell.user_ns[user_input.output] = result if user_input.path is not None: # status, result = self.linkis_client.download_csv(task_id, user_input.path) status, result = self.linkis_client.download_by_pipeline_engine( task_id, user_input.path) if status != True or result != "Success": raise Exception("Save Error, result: " + result) # self.ipython_display.display(result) if user_input.quiet == "False" or user_input.quiet == "false": return result else: raise Exception("HTTPException") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-o", "--output", type=str, default=None, help="Output of Job ") def listjob(self, line, cell="", local_ns=None): user_input = parse_argstring(self.listjob, line) job_list = self.linkis_client.job_history() if user_input.output is not None: self.shell.user_ns[user_input.output] = job_list else: return job_list @magic_arguments() @line_cell_magic @needs_local_scope @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ") def progress(self, line, cell="", local_ns=None): user_input = parse_argstring(self.kill, line) status, result = self.linkis_client.progress(user_input.id) if status: print(result) else: raise Exception("HTTPException") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ") def kill(self, line, cell="", local_ns=None): kill_input = parse_argstring(self.kill, line) # print(kill_input.id) status, result = self.linkis_client.kill(kill_input.id) print(result) if status: print("Succeed") else: print("ERROR") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ") def log(self, line, cell="", local_ns=None): user_input = parse_argstring(self.kill, line) status, result = self.linkis_client.log(user_input.id) if status: print(result) else: raise Exception("HTTPException") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ") def status(self, line, cell="", local_ns=None): user_input = parse_argstring(self.kill, line) status, result = self.linkis_client.status(user_input.id) if status: print(result) else: raise Exception("HTTPException") @magic_arguments() @line_cell_magic @needs_local_scope @argument("-o", "--output", type=str, default=None, help="Output of Job ") def listengine(self, line, cell="", local_ns=None): user_input = parse_argstring(self.listengine, line) status, engine_list = self.linkis_client.engines() if not status: raise Exception("Http Exception") if user_input.output is not None: self.shell.user_ns[user_input.output] = engine_list else: return engine_list @magic_arguments() @line_cell_magic @needs_local_scope @argument("-i", "--instance", type=str, default=None, help="Instance of Engine ") def enginekill(self, line, cell="", local_ns=None): user_input = parse_argstring(self.enginekill, line) status, result = self.linkis_client.engine_kill(user_input.instance) if status: print("Success") else: raise Exception("HTTPException") def load_ipython_extension(ip): ip.register_magics(LinkisMagic) # 优化log显示,分为详细log和关键性息 def log_detail(self): pass @magic_arguments() @line_cell_magic @needs_local_scope def flashcookies(self, line, cell="", local_ns=None): user_input = parse_argstring(self.listengine, line) self.linkis_client.refresh_cookies() self.ipython_display.display("Refresh Cookies Successful.")
class SparkMagicBase(Magics): _STRING_VAR_TYPE = 'str' _PANDAS_DATAFRAME_VAR_TYPE = 'df' _ALLOWED_LOCAL_TO_SPARK_TYPES = [_STRING_VAR_TYPE, _PANDAS_DATAFRAME_VAR_TYPE] def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = SparkLog(u"SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug(u'Initialized spark magics.') if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event() def do_send_to_spark(self, cell, input_variable_name, var_type, output_variable_name, max_rows, session_name): try: input_variable_value = self.shell.user_ns[input_variable_name] except KeyError: raise BadUserDataException(u'Variable named {} not found.'.format(input_variable_name)) if input_variable_value is None: raise BadUserDataException(u'Value of {} is None!'.format(input_variable_name)) if not output_variable_name: output_variable_name = input_variable_name if not max_rows: max_rows = conf.default_maxrows() input_variable_type = var_type.lower() if input_variable_type == self._STRING_VAR_TYPE: command = SendStringToSparkCommand(input_variable_name, input_variable_value, output_variable_name) elif input_variable_type == self._PANDAS_DATAFRAME_VAR_TYPE: command = SendPandasDfToSparkCommand(input_variable_name, input_variable_value, output_variable_name, max_rows) else: raise BadUserDataException(u'Invalid or incorrect -t type. Available are: [{}]'.format(u','.join(self._ALLOWED_LOCAL_TO_SPARK_TYPES))) (success, result, mime_type) = self.spark_controller.run_command(command, None) if not success: self.ipython_display.send_error(result) else: self.ipython_display.write(u'Successfully passed \'{}\' as \'{}\' to Spark' u' kernel'.format(input_variable_name, output_variable_name)) def execute_spark(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce): (success, out, mimetype) = self.spark_controller.run_command(Command(cell), session_name) if not success: if conf.shutdown_session_on_spark_statement_errors(): self.spark_controller.cleanup() raise SparkStatementException(out) else: if isinstance(out, string_types): if mimetype == MIMETYPE_TEXT_HTML: self.ipython_display.html(out) else: self.ipython_display.write(out) else: self.ipython_display.display(out) if output_var is not None: spark_store_command = self._spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_command(spark_store_command, session_name) self.shell.user_ns[output_var] = df @staticmethod def _spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce): return SparkStoreCommand(output_var, samplemethod, maxrows, samplefraction, coerce=coerce) def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction, session, output_var, quiet, coerce): sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_sqlquery(sqlquery, session) if output_var is not None: self.shell.user_ns[output_var] = df if quiet: return None else: return df @staticmethod def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce): return SQLQuery(cell, samplemethod, maxrows, samplefraction, coerce=coerce) def _print_endpoint_info(self, info_sessions, current_session_id): if info_sessions: info_sessions = sorted(info_sessions, key=lambda s: s.id) html = get_sessions_info_html(info_sessions, current_session_id) self.ipython_display.html(html) else: self.ipython_display.html(u'No active sessions.')