def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)
        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            # 项目启动的时候初始化sparkmagic.magic 和session
            self._load_spark_magics_extension()
            self._init_livy_session()
            if conf.use_auto_viz():
                self._register_auto_viz()
Esempio n. 2
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_spark(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name):
        (success,
         out) = self.spark_controller.run_command(Command(cell), session_name)
        if not success:
            self.ipython_display.send_error(out)
        else:
            self.ipython_display.write(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(
                    output_var, samplemethod, maxrows, samplefraction)
                df = self.spark_controller.run_command(spark_store_command,
                                                       session_name)
                self.shell.user_ns[output_var] = df

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows,
                             samplefraction):
        return SparkStoreCommand(output_var, samplemethod, maxrows,
                                 samplefraction)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
def _setup():
    global client_manager, controller, ipython_display

    client_manager = MagicMock()
    ipython_display = MagicMock()
    spark_events = MagicMock()
    controller = SparkController(ipython_display)
    controller.session_manager = client_manager
    controller.spark_events = spark_events
def _setup():
    global client_manager, controller, ipython_display

    client_manager = MagicMock()
    ipython_display = MagicMock()
    spark_events = MagicMock()
    controller = SparkController(ipython_display)
    controller.session_manager = client_manager
    controller.spark_events = spark_events
Esempio n. 5
0
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()
Esempio n. 6
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = u"""<table>
<tr><th>ID</th><th>YARN Application ID</th><th>Kind</th><th>State</th><th>Spark UI</th><th>Driver log</th><th>Current session?</th></tr>""" + \
                u"".join([SparkMagicBase._session_row_html(session, current_session_id) for session in info_sessions]) + \
                u"</table>"
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')

    @staticmethod
    def _session_row_html(session, current_session_id):
        return u"""<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td><td>{4}</td><td>{5}</td><td>{6}</td></tr>""".format(
            session.id, session.get_app_id(), session.kind, session.status,
            SparkMagicBase._link(u'Link', session.get_spark_ui_url()),
            SparkMagicBase._link(u'Link', session.get_driver_log_url()),
            u"" if current_session_id is None
            or current_session_id != session.id else u"✔")

    @staticmethod
    def _link(text, url):
        if url is not None:
            return u"""<a target="_blank" href="{1}">{0}</a>""".format(
                text, url)
        else:
            return u""
Esempio n. 7
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = u"""<table>
<tr><th>ID</th><th>YARN Application ID</th><th>Kind</th><th>State</th><th>Spark UI</th><th>Driver log</th><th>Current session?</th></tr>""" + \
                u"".join([SparkMagicBase._session_row_html(session, current_session_id) for session in info_sessions]) + \
                u"</table>"
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')

    @staticmethod
    def _session_row_html(session, current_session_id):
        return u"""<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td><td>{4}</td><td>{5}</td><td>{6}</td></tr>""".format(
            session.id, session.get_app_id(), session.kind, session.status,
            SparkMagicBase._link(u'Link', session.get_spark_ui_url()), SparkMagicBase._link(u'Link', session.get_driver_log_url()),
            u"" if current_session_id is None or current_session_id != session.id else u"✔"
        )

    @staticmethod
    def _link(text, url):
        if url is not None:
            return u"""<a target="_blank" href="{1}">{0}</a>""".format(text, url)
        else:
            return u""
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
Esempio n. 10
0
class SparkMagicBase(Magics):

    _STRING_VAR_TYPE = 'str'
    _PANDAS_DATAFRAME_VAR_TYPE = 'df'
    _ALLOWED_LOCAL_TO_SPARK_TYPES = [
        _STRING_VAR_TYPE, _PANDAS_DATAFRAME_VAR_TYPE
    ]

    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug(u'Initialized spark magics.')

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def do_send_to_spark(self, cell, input_variable_name, var_type,
                         output_variable_name, max_rows, session_name):
        try:
            input_variable_value = self.shell.user_ns[input_variable_name]
        except KeyError:
            raise BadUserDataException(
                u'Variable named {} not found.'.format(input_variable_name))
        if input_variable_value is None:
            raise BadUserDataException(
                u'Value of {} is None!'.format(input_variable_name))

        if not output_variable_name:
            output_variable_name = input_variable_name

        if not max_rows:
            max_rows = conf.default_maxrows()

        input_variable_type = var_type.lower()
        if input_variable_type == self._STRING_VAR_TYPE:
            command = SendStringToSparkCommand(input_variable_name,
                                               input_variable_value,
                                               output_variable_name)
        elif input_variable_type == self._PANDAS_DATAFRAME_VAR_TYPE:
            command = SendPandasDfToSparkCommand(input_variable_name,
                                                 input_variable_value,
                                                 output_variable_name,
                                                 max_rows)
        else:
            raise BadUserDataException(
                u'Invalid or incorrect -t type. Available are: [{}]'.format(
                    u','.join(self._ALLOWED_LOCAL_TO_SPARK_TYPES)))

        (success, result,
         mime_type) = self.spark_controller.run_command(command, None)
        if not success:
            self.ipython_display.send_error(result)
        else:
            self.ipython_display.write(
                u'Successfully passed \'{}\' as \'{}\' to Spark'
                u' kernel'.format(input_variable_name, output_variable_name))

    def execute_spark(self,
                      cell,
                      output_var,
                      samplemethod,
                      maxrows,
                      samplefraction,
                      session_name,
                      coerce,
                      output_handler=None):
        output_handler = output_handler or SparkOutputHandler(
            html=self.ipython_display.html,
            text=self.ipython_display.write,
            default=self.ipython_display.display)

        (success, out,
         mimetype) = self.spark_controller.run_command(Command(cell),
                                                       session_name)
        if not success:
            if conf.shutdown_session_on_spark_statement_errors():
                self.spark_controller.cleanup()

            raise SparkStatementException(out)
        else:
            if isinstance(out, string_types):
                if mimetype == MIMETYPE_TEXT_HTML:
                    output_handler.html(out)
                else:
                    output_handler.text(out)
            else:
                output_handler.default(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(
                    output_var, samplemethod, maxrows, samplefraction, coerce)
                df = self.spark_controller.run_command(spark_store_command,
                                                       session_name)
                self.shell.user_ns[output_var] = df

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows, samplefraction,
                             coerce):
        return SparkStoreCommand(output_var,
                                 samplemethod,
                                 maxrows,
                                 samplefraction,
                                 coerce=coerce)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet, coerce):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction,
                                  coerce)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce):
        return SQLQuery(cell,
                        samplemethod,
                        maxrows,
                        samplefraction,
                        coerce=coerce)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
Esempio n. 11
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_final(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name, coerce):
        (success,
         out) = self.spark_controller.run_command(Command(cell), session_name)
        if not success:
            self.ipython_display.send_error(out)
        else:
            self.ipython_display.write(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(
                    output_var, samplemethod, maxrows, samplefraction, coerce)
                df = self.spark_controller.run_command(spark_store_command,
                                                       session_name)
                self.shell.user_ns[output_var] = df

    def execute_spark(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name, coerce):

        if "lagom as" in cell:
            self.ipython_display.send_error(
                "You are not allowed to do the following: 'import maggy.experiment.lagom as ...'. Please, just use 'import maggy.experiment as experiment' (or something else)"
            )
            raise
        elif ".lagom" in cell:
            client = Client(self.spark_controller, self.session_name, 5,
                            self.ipython_display)
            try:
                client.start_heartbeat()
                if DEBUG:
                    self.ipython_display.writeln("Started heartbeating...")
                self.execute_final(cell, output_var, samplemethod, maxrows,
                                   samplefraction, session_name, coerce)
            except:
                raise
            finally:
                # 4. Kill thread before leaving current scope
                client.stop()
                try:
                    client.close()
                except:
                    if DEBUG:
                        print("Socket already closed by maggy server.")
                    pass
        else:
            self.execute_final(cell, output_var, samplemethod, maxrows,
                               samplefraction, session_name, coerce)

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows, samplefraction,
                             coerce):
        return SparkStoreCommand(output_var,
                                 samplemethod,
                                 maxrows,
                                 samplefraction,
                                 coerce=coerce)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet, coerce):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction,
                                  coerce)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce):
        return SQLQuery(cell,
                        samplemethod,
                        maxrows,
                        samplefraction,
                        coerce=coerce)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
Esempio n. 12
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def _get_session_name_by_session(self, session):
        session_name = self.spark_controller.session_manager.get_session_name_by_id(
            session.id)
        # 如果session不存在,则将session激活并加入session_list
        if not session_name:
            session_name = session.session_name
            if session_name:
                self.spark_controller.session_manager.add_session(
                    session_name, session)
                session.already_start()
                return session_name
        else:
            return session_name

        return None

    def init_livy_session(self, language="python"):
        '''
            执行sql时自动初始化sql
        :return:
        '''
        return self.__get_or_create_session(language)

    def __get_or_create_session(self, language):
        proxy_user = getpass.getuser()

        self.session_language = language
        endpoint = build_endpoint(self.session_language)
        kernel_instance_id = id(self.shell.kernel)
        session_name_seleted = self.spark_controller.generate_livy_session_name(
            kernel_instance_id)

        properties = conf.get_session_properties(self.session_language)
        properties["proxyUser"] = proxy_user
        properties["session_language"] = self.session_language
        properties["session_name"] = session_name_seleted

        session_info_list = self.spark_controller.get_all_sessions_endpoint(
            endpoint)
        for session in session_info_list:
            # session kind 必须一致
            if session.kind != properties['kind']:
                continue

            # 区分pyspark 及 pyspark3
            if session.session_language != properties['session_language']:
                continue

            session_name = self._get_session_name_by_session(session)
            if session_name == session_name_seleted:
                if session.status in constants.HEALTHY_SESSION_STATUS:
                    return session_name_seleted
                elif session.status in constants.FINAL_STATEMENT_STATUS:
                    # FINAL, recreate new session
                    self.spark_controller.add_session(session_name_seleted,
                                                      endpoint, False,
                                                      properties)
                    return session_name_seleted
        else:
            # 如果livy中没有session,则创建session
            self.spark_controller.add_session(session_name_seleted, endpoint,
                                              False, properties)
            return session_name_seleted

    def execute_spark(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name, coerce):
        (success,
         out) = self.spark_controller.run_command(Command(cell), session_name)
        if not success:
            self.ipython_display.send_error(out)
        else:
            if isinstance(out, string_types):
                self.ipython_display.write(out)
            elif isinstance(out, dict):
                df = convert_data_struct_to_dataframe(out)
                html = df.fillna('NULL').astype(str).to_html(notebook=True)
                self.ipython_display.html(html)
            else:
                self.ipython_display.write(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(
                    output_var, samplemethod, maxrows, samplefraction, coerce)
                df = self.spark_controller.run_command(spark_store_command,
                                                       session_name)
                self.shell.user_ns[output_var] = df

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows, samplefraction,
                             coerce):
        return SparkStoreCommand(output_var,
                                 samplemethod,
                                 maxrows,
                                 samplefraction,
                                 coerce=coerce)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet, coerce):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction,
                                  coerce)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce):
        return SQLQuery(cell,
                        samplemethod,
                        maxrows,
                        samplefraction,
                        coerce=coerce)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
Esempio n. 13
0
class SparkMagicBase(Magics):

    _STRING_VAR_TYPE = 'str'
    _PANDAS_DATAFRAME_VAR_TYPE = 'df'
    _ALLOWED_LOCAL_TO_SPARK_TYPES = [
        _STRING_VAR_TYPE, _PANDAS_DATAFRAME_VAR_TYPE
    ]

    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug(u'Initialized spark magics.')

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def do_send_to_spark(self, cell, input_variable_name, var_type,
                         output_variable_name, max_rows, session_name):
        try:
            input_variable_value = self.shell.user_ns[input_variable_name]
        except KeyError:
            raise BadUserDataException(
                u'Variable named {} not found.'.format(input_variable_name))
        if input_variable_value is None:
            raise BadUserDataException(
                u'Value of {} is None!'.format(input_variable_name))

        if not output_variable_name:
            output_variable_name = input_variable_name

        if not max_rows:
            max_rows = conf.default_maxrows()

        input_variable_type = var_type.lower()
        if input_variable_type == self._STRING_VAR_TYPE:
            command = SendStringToSparkCommand(input_variable_name,
                                               input_variable_value,
                                               output_variable_name)
        elif input_variable_type == self._PANDAS_DATAFRAME_VAR_TYPE:
            command = SendPandasDfToSparkCommand(input_variable_name,
                                                 input_variable_value,
                                                 output_variable_name,
                                                 max_rows)
        else:
            raise BadUserDataException(
                u'Invalid or incorrect -t type. Available are: [{}]'.format(
                    u','.join(self._ALLOWED_LOCAL_TO_SPARK_TYPES)))

        (success, result,
         mime_type) = self.spark_controller.run_command(command, None)
        if not success:
            self.ipython_display.send_error(result)
        else:
            self.ipython_display.write(
                u'Successfully passed \'{}\' as \'{}\' to Spark'
                u' kernel'.format(input_variable_name, output_variable_name))

    def execute_final(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name, coerce):
        (success, out,
         mimetype) = self.spark_controller.run_command(Command(cell),
                                                       session_name)
        if not success:
            if conf.shutdown_session_on_spark_statement_errors():
                self.spark_controller.cleanup()

            raise SparkStatementException(out)
        else:
            if isinstance(out, string_types):
                if mimetype == MIMETYPE_TEXT_HTML:
                    self.ipython_display.html(out)
                else:
                    self.ipython_display.write(out)
            else:
                self.ipython_display.display(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(
                    output_var, samplemethod, maxrows, samplefraction, coerce)
                df = self.spark_controller.run_command(spark_store_command,
                                                       session_name)
                self.shell.user_ns[output_var] = df

    def execute_spark(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name, coerce):

        if "lagom as" in cell:
            self.ipython_display.send_error(
                "You are not allowed to do the following: 'import maggy.experiment.lagom as ...'. Please, just use 'import maggy.experiment as experiment' (or something else)"
            )
            raise
        elif ".lagom" in cell:
            client = Client(self.spark_controller, self.session_name, 5,
                            self.ipython_display)
            try:
                client.start_heartbeat()
                if DEBUG:
                    self.ipython_display.writeln("Started heartbeating...")
                self.execute_final(cell, output_var, samplemethod, maxrows,
                                   samplefraction, session_name, coerce)
            except:
                raise
            finally:
                # 4. Kill thread before leaving current scope
                client.stop()
                try:
                    client.close()
                except:
                    if DEBUG:
                        print("Socket already closed by maggy server.")
                    pass
        else:
            self.execute_final(cell, output_var, samplemethod, maxrows,
                               samplefraction, session_name, coerce)

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows, samplefraction,
                             coerce):
        return SparkStoreCommand(output_var,
                                 samplemethod,
                                 maxrows,
                                 samplefraction,
                                 coerce=coerce)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet, coerce):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction,
                                  coerce)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce):
        return SQLQuery(cell,
                        samplemethod,
                        maxrows,
                        samplefraction,
                        coerce=coerce)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
class SparkKernelBase(IPythonKernel):
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)
        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            # 项目启动的时候初始化sparkmagic.magic 和session
            self._load_spark_magics_extension()
            self._init_livy_session()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def _is_sql_filter(self, code):
        if conf.is_sql_restrict():
            if re.search(r'\s*show\s+databases', code.lower()):
                return True

            if re.search(r'\s*use\s+', code.lower()):
                return True

        return False

    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        def f(self):
            if self._is_sql_filter(code):
                self.ipython_display.write("已为您选择好专属数据库, 直接使用show tables 试试看")
                return self._complete_cell()

            if self._fatal_error is not None:
                return self._repeat_fatal_error()

            return self._do_execute(code, silent, store_history, user_expressions, allow_stdin)
        return wrap_unexpected_exceptions(f, self._complete_cell)(self)

    def do_shutdown(self, restart):
        # Cleanup
        self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin):
        code_to_run = self.user_code_parser.get_code_to_run(code)

        res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin)

        return res

    def _load_magics_extension(self):
        register_magics_code = "%load_ext sparkmagic.kernels"
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to load the Spark kernels magics library.")
        self.logger.debug("Loaded magics.")

    def _load_spark_magics_extension(self):
        '''
            初始化spark.magic,类似执行%load_ext sparkmagic.magics
        :return:
        '''
        register_spark_magics_code = "%load_ext sparkmagic.magics"
        self._execute_cell(register_spark_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to load the Spark Magics library.")
        self.logger.debug("Loaded sparkmagic.magics")

    def _change_language(self):
        register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(self.session_language)
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to change language to {}.".format(self.session_language))
        self.logger.debug("Changed language.")

    def _init_livy_session(self):
        '''
            初始化session不应该在此类执行具体操作,应该委派kernelmagics初始化session,
        :return:
        '''
        register_magics_code = "%%_do_not_call_init_livy_session -i {}\n ".format(self.session_language)
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to init livy session: {}.".format(self.session_language))
        self.logger.debug("Init livy session.")

    def _register_auto_viz(self):
        from sparkmagic.utils.sparkevents import get_spark_events_handler
        import autovizwidget.utils.configuration as c

        handler = get_spark_events_handler()
        c.override("events_handler", handler)

        register_auto_viz_code = """from autovizwidget.widget.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to register auto viz for notebook.")
        self.logger.debug("Registered auto viz.")

    def _delete_session(self):
        code = "%%_do_not_call_delete_session\n "
        self._execute_cell_for_user(code, True, False)

    def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False,
                      shutdown_if_error=False, log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply)
                return self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _complete_cell(self):
        """A method that runs a cell with no effect. Call this and return the value it
        returns when there's some sort of error preventing the user's cell from executing; this
        will register the cell from the Jupyter UI as being completed."""
        return self._execute_cell("None", False, True, None, False)

    def _show_user_error(self, message):
        self.logger.error(message)
        self.ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        return self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self.logger.error(error)
        self.ipython_display.send_error(error)
        return self._complete_cell()

    @gen.coroutine
    def complete_request(self, stream, ident, parent):
        content = parent['content']
        code = content['code']
        cursor_pos = content['cursor_pos']

        matches = yield gen.maybe_future(self.do_complete(code, cursor_pos))
        matches = json_clean(matches)
        completion_msg = self.session.send(stream, 'complete_reply',
                                           matches, parent, ident)

    def _experimental_do_complete(self, code, cursor_pos):
        """
        Experimental completions from IPython, using livy completion.
        """

        code = code.strip()
        if cursor_pos is None:
            cursor_pos = len(code)

        matches = []
        with provisionalcompleter():
            session_name = self.spark_controller.generate_livy_session_name(id(self))

            endpoint = build_endpoint(self.session_language)
            session_info_list = self.spark_controller.get_all_sessions_endpoint(endpoint)
            session_id = None
            for session in session_info_list:
                if session.session_name == session_name:
                    session_id = session.id

            if session_id:
                # Only complete the cursor_line
                cursor_line, cursor_column = position_to_cursor(code, cursor_pos)
                lines = code.split("\n")
                completion_line = lines[cursor_line]
                before_lines = lines[:cursor_line]
                if len(lines) > 1 and cursor_line > 0:
                    real_cursor_pos = cursor_pos - len("\n".join(before_lines)) - 1
                else:
                    real_cursor_pos = cursor_pos

                http_client = self.spark_controller._http_client(endpoint)
                kind = conf.get_livy_kind(self.session_language)
                res_completions = http_client.post_completion(session_id, kind, completion_line, real_cursor_pos)
                matches = res_completions.get("candidates", [])

        if matches:
            s = self.__get_cursor_start(code, cursor_pos, matches[0])
        else:
            s = cursor_pos

        res = {
            'matches': matches,
            'cursor_end': cursor_pos,
            'cursor_start': s,
            'metadata': {},
            'status': 'ok'
        }
        return res

    def __get_cursor_start(self, code, cursor_pos, match):
        before_code = code[:cursor_pos]
        before_code_rev = before_code[::-1]
        bucket = []
        for c in before_code_rev:
            if len(bucket) >= len(match):
                break

            if re.match(r"\w", c):
                bucket.insert(0, c)
            else:
                break

            if c == match[0]:
                bucket_len = len(bucket)
                completion_match_prefix = "".join(bucket)
                if completion_match_prefix == match[:bucket_len]:
                    return cursor_pos - bucket_len

        return cursor_pos

    def do_apply(self, content, bufs, msg_id, reply_metadata):
        from sparkmagic.messages_api.apply_request import ApplyRequestHandler
        result_buf = []
        reply_content = ApplyRequestHandler(self).dispath_request(content)
        return reply_content, result_buf