コード例 #1
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_spark(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce):
        (success, out, mimetype) = self.spark_controller.run_command(Command(cell), session_name)
        if not success:
            self.ipython_display.send_error(out)
        else:
            if isinstance(out, string_types):
                if mimetype == MIMETYPE_TEXT_HTML:
                    self.ipython_display.html(out)
                else:
                    self.ipython_display.write(out)
            else:
                self.ipython_display.display(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce)
                df = self.spark_controller.run_command(spark_store_command, session_name)
                self.shell.user_ns[output_var] = df

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce):
        return SparkStoreCommand(output_var, samplemethod, maxrows, samplefraction, coerce=coerce)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet, coerce):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction, coerce)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction, coerce=coerce)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
コード例 #2
0
ファイル: linkismagic.py プロジェクト: zwx-master/Prophecis
class LinkisMagic(Magics):
    def __init__(self, shell, data=None, widget=None):
        super(LinkisMagic, self).__init__(shell)
        self.ipython_display = IpythonDisplay()
        self.data = data
        #        if widget is None:
        #            widget = MagicsControllerWidget(self.spark_controller, IpyWidgetFactory(), self.ipython_display)
        #        self.manage_widget = widget
        self.linkis_client = LinkisClient()

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-o", "--output", type=str, default=None, help="Output of Job ")
    @argument("-p",
              "--path",
              type=str,
              default=None,
              help="Download output in path ")
    @argument("-q",
              "--quiet",
              type=str,
              default=True,
              help="Do not display result on console")
    def spark(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.spark, line)

        code = cell
        status, exec_id, task_id = self.linkis_client.execute("spark",
                                                              code,
                                                              run_type="spark")
        if not status:
            raise Exception("HTTPException")
        else:
            status, exec_status, log, result = self.linkis_client.get_execute_result(
                exec_id, task_id)
            if status:
                if exec_status == "Failed":
                    print(log["keyword"])
                    print(log["log"])
                else:
                    if user_input.output is not None:
                        self.shell.user_ns[user_input.output] = result
                    if user_input.path is not None:
                        status, result = self.linkis_client.download_by_pipeline_engine(
                            task_id, user_input.path)
                        if status and result == "None":
                            raise Exception("Save Error, Result dir is None")
                        elif not status or result != "Success":
                            raise Exception("Save Error, Result: " + result)
                    if user_input.quiet == "False" or user_input.quiet == "false":
                        return result
            else:
                raise Exception("HTTPException: get_execute_result error")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-o", "--output", type=str, default=None, help="Output of Job ")
    @argument("-p",
              "--path",
              type=str,
              default=None,
              help="Download output in path ")
    @argument("-q",
              "--quiet",
              type=str,
              default=True,
              help="Do not display result on console")
    @argument("-v",
              "--var",
              type=str,
              default=None,
              help="transport var from spark cluster to local python ")
    @argument("-u",
              "--upload",
              type=str,
              default=None,
              help="transport var from local python to spark cluster")
    def pyspark(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.pyspark, line)
        pyspark_code = cell

        if user_input.upload is not None:
            pyspark_code = self.linkis_client.pyspark_load_pickle_code(
                user_input.upload) + pyspark_code
            self.linkis_client.save_pickle_file(
                user_input.upload, self.shell.user_ns[user_input.upload])

        if user_input.var is not None:
            pyspark_code = pyspark_code + self.linkis_client.define_pickel_code(
                user_input.var)

        status, exec_id, task_id = self.linkis_client.execute(
            "spark", pyspark_code, run_type="python")
        if not status:
            raise Exception("HTTPException: execute job error")
        else:
            status, exec_status, log, result = self.linkis_client.get_execute_result(
                exec_id, task_id)
            if status:
                if exec_status == "Failed":
                    print(log["keyword"])
                    print(log["log"])
                else:
                    if user_input.output is not None:
                        self.shell.user_ns[user_input.output] = result
                    if user_input.path is not None:
                        status, result = self.linkis_client.download_by_pipeline_engine(
                            task_id, user_input.path)
                        if status and result == "None":
                            raise Exception("Save Error, Result dir is None")
                        elif not status or result != "Success":
                            raise Exception("Save Error, Result: " + result)
                    if user_input.var is not None:
                        self.shell.user_ns[
                            user_input.
                            var] = self.linkis_client.load_pickle_var(
                                user_input.var)
                    if user_input.upload is not None:
                        self.linkis_client.delete_upload_var(user_input.upload)
                    # self.ipython_display.display(result)
                    if user_input.quiet == "False" or user_input.quiet == "false":
                        return result
            else:
                raise Exception("HTTPException: get_execute_result error")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-o", "--output", type=str, default=None, help="Output of Job ")
    @argument("-p",
              "--path",
              type=str,
              default=None,
              help="Download output in path ")
    @argument("-q",
              "--quiet",
              type=str,
              default=True,
              help="Do not display result on console")
    def sql(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.sql, line)
        sql_code = 'if "hiveContext" not in locals().keys():\n \
        \tfrom pyspark.sql import HiveContext\n\
        \thiveContext = HiveContext(sc)\n'

        cell_list = cell.split("\n")
        for i in range(len(cell_list)):
            if "" != cell_list[i]:
                sql_code = sql_code + 'hiveContext.sql("' + cell_list[
                    i] + '").show()' + '\n'
        print(sql_code)
        status, exec_id, task_id = self.linkis_client.execute(
            "spark", sql_code, run_type="python")
        if not status:
            raise Exception("HTTPException")
        else:
            status, exec_status, log, result = self.linkis_client.get_execute_result(
                exec_id, task_id)
            if status:
                if exec_status == "Failed":
                    print(log["keyword"])
                    print(log["log"])
                else:
                    if user_input.output is not None:
                        self.shell.user_ns[user_input.output] = result
                    if user_input.path is not None:
                        status, result = self.linkis_client.download_by_pipeline_engine(
                            task_id)
                        if status and result == "None":
                            raise Exception("Save Error, Result dir is None")
                        elif not status or result != "Success":
                            raise Exception("Save Error, Result: " + result)
                    # self.ipython_display.display(result)
                    if user_input.quiet == "False" or user_input.quiet == "false":
                        return result
            else:
                raise Exception("HTTPException")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-o",
              "--output",
              type=str,
              default=None,
              help="output var of Job ")
    @argument("-p",
              "--path",
              type=str,
              default=None,
              help="Download output in path ")
    @argument("-q",
              "--quiet",
              type=str,
              default=True,
              help="Do not display result on console")
    def sparksql(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.sparksql, line)
        code = cell
        status, exec_id, task_id = self.linkis_client.execute("spark",
                                                              code,
                                                              run_type="sql")
        if not status:
            raise Exception("HTTPException")
        else:
            status, exec_status, log, result = self.linkis_client.get_execute_result(
                exec_id, task_id)
            if status:
                if exec_status == "Failed":
                    print(log["keyword"])
                    print(log["log"])
                else:
                    if user_input.output is not None:
                        self.shell.user_ns[user_input.output] = result
                    if user_input.path is not None:
                        # status, result = self.linkis_client.download_csv(task_id, user_input.path)
                        status, result = self.linkis_client.download_by_pipeline_engine(
                            task_id, user_input.path)
                        if status != True or result != "Success":
                            raise Exception("Save Error, result: " + result)
                    # self.ipython_display.display(result)
                    if user_input.quiet == "False" or user_input.quiet == "false":
                        return result
            else:
                raise Exception("HTTPException")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-o", "--output", type=str, default=None, help="Output of Job ")
    def listjob(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.listjob, line)
        job_list = self.linkis_client.job_history()
        if user_input.output is not None:
            self.shell.user_ns[user_input.output] = job_list
        else:
            return job_list

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ")
    def progress(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.kill, line)
        status, result = self.linkis_client.progress(user_input.id)
        if status:
            print(result)
        else:
            raise Exception("HTTPException")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ")
    def kill(self, line, cell="", local_ns=None):
        kill_input = parse_argstring(self.kill, line)
        #        print(kill_input.id)
        status, result = self.linkis_client.kill(kill_input.id)
        print(result)
        if status:
            print("Succeed")
        else:
            print("ERROR")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ")
    def log(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.kill, line)
        status, result = self.linkis_client.log(user_input.id)
        if status:
            print(result)
        else:
            raise Exception("HTTPException")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ")
    def status(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.kill, line)
        status, result = self.linkis_client.status(user_input.id)
        if status:
            print(result)
        else:
            raise Exception("HTTPException")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-o", "--output", type=str, default=None, help="Output of Job ")
    def listengine(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.listengine, line)
        status, engine_list = self.linkis_client.engines()
        if not status:
            raise Exception("Http Exception")
        if user_input.output is not None:
            self.shell.user_ns[user_input.output] = engine_list
        else:
            return engine_list

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-i",
              "--instance",
              type=str,
              default=None,
              help="Instance of Engine ")
    def enginekill(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.enginekill, line)
        status, result = self.linkis_client.engine_kill(user_input.instance)
        if status:
            print("Success")
        else:
            raise Exception("HTTPException")

    def load_ipython_extension(ip):
        ip.register_magics(LinkisMagic)

    # 优化log显示,分为详细log和关键性息
    def log_detail(self):
        pass

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    def flashcookies(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.listengine, line)
        self.linkis_client.refresh_cookies()
        self.ipython_display.display("Refresh Cookies Successful.")
コード例 #3
0
class SparkMagicBase(Magics):

    _STRING_VAR_TYPE = 'str'
    _PANDAS_DATAFRAME_VAR_TYPE = 'df'
    _ALLOWED_LOCAL_TO_SPARK_TYPES = [_STRING_VAR_TYPE, _PANDAS_DATAFRAME_VAR_TYPE]

    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug(u'Initialized spark magics.')

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def do_send_to_spark(self, cell, input_variable_name, var_type, output_variable_name, max_rows, session_name):
        try:
            input_variable_value = self.shell.user_ns[input_variable_name]
        except KeyError:
            raise BadUserDataException(u'Variable named {} not found.'.format(input_variable_name))
        if input_variable_value is None:
            raise BadUserDataException(u'Value of {} is None!'.format(input_variable_name))

        if not output_variable_name:
            output_variable_name = input_variable_name

        if not max_rows:
            max_rows = conf.default_maxrows()

        input_variable_type = var_type.lower()
        if input_variable_type == self._STRING_VAR_TYPE:
            command = SendStringToSparkCommand(input_variable_name, input_variable_value, output_variable_name)
        elif input_variable_type == self._PANDAS_DATAFRAME_VAR_TYPE:
            command = SendPandasDfToSparkCommand(input_variable_name, input_variable_value, output_variable_name, max_rows)
        else:
            raise BadUserDataException(u'Invalid or incorrect -t type. Available are: [{}]'.format(u','.join(self._ALLOWED_LOCAL_TO_SPARK_TYPES)))

        (success, result, mime_type) = self.spark_controller.run_command(command, None)
        if not success:
            self.ipython_display.send_error(result)
        else:
            self.ipython_display.write(u'Successfully passed \'{}\' as \'{}\' to Spark'
                                       u' kernel'.format(input_variable_name, output_variable_name))

    def execute_spark(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce):
        (success, out, mimetype) = self.spark_controller.run_command(Command(cell), session_name)
        if not success:
            if conf.shutdown_session_on_spark_statement_errors():
                self.spark_controller.cleanup()

            raise SparkStatementException(out)
        else:
            if isinstance(out, string_types):
                if mimetype == MIMETYPE_TEXT_HTML:
                    self.ipython_display.html(out)
                else:
                    self.ipython_display.write(out)
            else:
                self.ipython_display.display(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce)
                df = self.spark_controller.run_command(spark_store_command, session_name)
                self.shell.user_ns[output_var] = df

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce):
        return SparkStoreCommand(output_var, samplemethod, maxrows, samplefraction, coerce=coerce)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet, coerce):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction, coerce)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction, coerce=coerce)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')