コード例 #1
0
def test_scala_code():
    parser = UserCodeParser()
    cell = u"""/* Place the cursor in the cell and press SHIFT + ENTER to run */

val fruits = sc.textFile("wasb:///example/data/fruits.txt")
val yellowThings = sc.textFile("wasb:///example/data/yellowthings.txt")"""

    assert_equals(u"%%spark\n{}".format(cell), parser.get_code_to_run(cell))
コード例 #2
0
def test_other_line_magic():
    parser = UserCodeParser()
    cell = u"""%magic
hi
hi
hi"""

    assert_equals(cell, parser.get_code_to_run(cell))
コード例 #3
0
def test_other_cell_magic():
    parser = UserCodeParser()
    cell = u"""%%magic
hi
hi
hi"""

    assert_equals(u"{}".format(cell), parser.get_code_to_run(cell))
コード例 #4
0
def test_our_line_magics_with_content():
    parser = UserCodeParser()
    magic_name = KernelMagics.info.__name__
    cell = u"""%{}
my content
more content""".format(magic_name)

    assert_equals(u"%%{}\nmy content\nmore content\n ".format(magic_name), parser.get_code_to_run(cell))
コード例 #5
0
def test_other_cell_magic():
    parser = UserCodeParser()
    cell = u"""%%magic
hi
hi
hi"""

    assert_equals(u"{}".format(cell), parser.get_code_to_run(cell))
コード例 #6
0
def test_other_line_magic():
    parser = UserCodeParser()
    cell = u"""%magic
hi
hi
hi"""

    assert_equals(cell, parser.get_code_to_run(cell))
コード例 #7
0
def test_local_double():
    parser = UserCodeParser()
    cell = u"""%%local
hi
hi
hi"""

    assert_equals(u"hi\nhi\nhi", parser.get_code_to_run(cell))
コード例 #8
0
def test_scala_code():
    parser = UserCodeParser()
    cell = u"""/* Place the cursor in the cell and press SHIFT + ENTER to run */

val fruits = sc.textFile("wasb:///example/data/fruits.txt")
val yellowThings = sc.textFile("wasb:///example/data/yellowthings.txt")"""

    assert_equals(u"%%spark\n{}".format(cell), parser.get_code_to_run(cell))
コード例 #9
0
def test_local_double():
    parser = UserCodeParser()
    cell = u"""%%local
hi
hi
hi"""

    assert_equals(u"hi\nhi\nhi", parser.get_code_to_run(cell))
コード例 #10
0
def test_unicode_in_magics():
    parser = UserCodeParser()
    magic_name = KernelMagics.info.__name__
    cell = u"""%{}
my content è🐙
more content""".format(magic_name)

    assert_equals(u"%%{}\nmy content è🐙\nmore content\n ".format(magic_name), parser.get_code_to_run(cell))
コード例 #11
0
def test_unicode_in_magics():
    parser = UserCodeParser()
    magic_name = KernelMagics.info.__name__
    cell = u"""%{}
my content è🐙
more content""".format(magic_name)

    assert_equals(u"%%{}\nmy content è🐙\nmore content\n ".format(magic_name),
                  parser.get_code_to_run(cell))
コード例 #12
0
def test_our_line_magics_with_content():
    parser = UserCodeParser()
    magic_name = KernelMagics.info.__name__
    cell = u"""%{}
my content
more content""".format(magic_name)

    assert_equals(u"%%{}\nmy content\nmore content\n ".format(magic_name),
                  parser.get_code_to_run(cell))
コード例 #13
0
class SparkKernelBase(IPythonKernel):
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)
        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            # 项目启动的时候初始化sparkmagic.magic 和session
            self._load_spark_magics_extension()
            self._init_livy_session()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def _is_sql_filter(self, code):
        if conf.is_sql_restrict():
            if re.search(r'\s*show\s+databases', code.lower()):
                return True

            if re.search(r'\s*use\s+', code.lower()):
                return True

        return False

    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        def f(self):
            if self._is_sql_filter(code):
                self.ipython_display.write("已为您选择好专属数据库, 直接使用show tables 试试看")
                return self._complete_cell()

            if self._fatal_error is not None:
                return self._repeat_fatal_error()

            return self._do_execute(code, silent, store_history, user_expressions, allow_stdin)
        return wrap_unexpected_exceptions(f, self._complete_cell)(self)

    def do_shutdown(self, restart):
        # Cleanup
        self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin):
        code_to_run = self.user_code_parser.get_code_to_run(code)

        res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin)

        return res

    def _load_magics_extension(self):
        register_magics_code = "%load_ext sparkmagic.kernels"
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to load the Spark kernels magics library.")
        self.logger.debug("Loaded magics.")

    def _load_spark_magics_extension(self):
        '''
            初始化spark.magic,类似执行%load_ext sparkmagic.magics
        :return:
        '''
        register_spark_magics_code = "%load_ext sparkmagic.magics"
        self._execute_cell(register_spark_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to load the Spark Magics library.")
        self.logger.debug("Loaded sparkmagic.magics")

    def _change_language(self):
        register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(self.session_language)
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to change language to {}.".format(self.session_language))
        self.logger.debug("Changed language.")

    def _init_livy_session(self):
        '''
            初始化session不应该在此类执行具体操作,应该委派kernelmagics初始化session,
        :return:
        '''
        register_magics_code = "%%_do_not_call_init_livy_session -i {}\n ".format(self.session_language)
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to init livy session: {}.".format(self.session_language))
        self.logger.debug("Init livy session.")

    def _register_auto_viz(self):
        from sparkmagic.utils.sparkevents import get_spark_events_handler
        import autovizwidget.utils.configuration as c

        handler = get_spark_events_handler()
        c.override("events_handler", handler)

        register_auto_viz_code = """from autovizwidget.widget.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to register auto viz for notebook.")
        self.logger.debug("Registered auto viz.")

    def _delete_session(self):
        code = "%%_do_not_call_delete_session\n "
        self._execute_cell_for_user(code, True, False)

    def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False,
                      shutdown_if_error=False, log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply)
                return self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _complete_cell(self):
        """A method that runs a cell with no effect. Call this and return the value it
        returns when there's some sort of error preventing the user's cell from executing; this
        will register the cell from the Jupyter UI as being completed."""
        return self._execute_cell("None", False, True, None, False)

    def _show_user_error(self, message):
        self.logger.error(message)
        self.ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        return self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self.logger.error(error)
        self.ipython_display.send_error(error)
        return self._complete_cell()

    @gen.coroutine
    def complete_request(self, stream, ident, parent):
        content = parent['content']
        code = content['code']
        cursor_pos = content['cursor_pos']

        matches = yield gen.maybe_future(self.do_complete(code, cursor_pos))
        matches = json_clean(matches)
        completion_msg = self.session.send(stream, 'complete_reply',
                                           matches, parent, ident)

    def _experimental_do_complete(self, code, cursor_pos):
        """
        Experimental completions from IPython, using livy completion.
        """

        code = code.strip()
        if cursor_pos is None:
            cursor_pos = len(code)

        matches = []
        with provisionalcompleter():
            session_name = self.spark_controller.generate_livy_session_name(id(self))

            endpoint = build_endpoint(self.session_language)
            session_info_list = self.spark_controller.get_all_sessions_endpoint(endpoint)
            session_id = None
            for session in session_info_list:
                if session.session_name == session_name:
                    session_id = session.id

            if session_id:
                # Only complete the cursor_line
                cursor_line, cursor_column = position_to_cursor(code, cursor_pos)
                lines = code.split("\n")
                completion_line = lines[cursor_line]
                before_lines = lines[:cursor_line]
                if len(lines) > 1 and cursor_line > 0:
                    real_cursor_pos = cursor_pos - len("\n".join(before_lines)) - 1
                else:
                    real_cursor_pos = cursor_pos

                http_client = self.spark_controller._http_client(endpoint)
                kind = conf.get_livy_kind(self.session_language)
                res_completions = http_client.post_completion(session_id, kind, completion_line, real_cursor_pos)
                matches = res_completions.get("candidates", [])

        if matches:
            s = self.__get_cursor_start(code, cursor_pos, matches[0])
        else:
            s = cursor_pos

        res = {
            'matches': matches,
            'cursor_end': cursor_pos,
            'cursor_start': s,
            'metadata': {},
            'status': 'ok'
        }
        return res

    def __get_cursor_start(self, code, cursor_pos, match):
        before_code = code[:cursor_pos]
        before_code_rev = before_code[::-1]
        bucket = []
        for c in before_code_rev:
            if len(bucket) >= len(match):
                break

            if re.match(r"\w", c):
                bucket.insert(0, c)
            else:
                break

            if c == match[0]:
                bucket_len = len(bucket)
                completion_match_prefix = "".join(bucket)
                if completion_match_prefix == match[:bucket_len]:
                    return cursor_pos - bucket_len

        return cursor_pos

    def do_apply(self, content, bufs, msg_id, reply_metadata):
        from sparkmagic.messages_api.apply_request import ApplyRequestHandler
        result_buf = []
        reply_content = ApplyRequestHandler(self).dispath_request(content)
        return reply_content, result_buf
コード例 #14
0
def test_unicode():
    parser = UserCodeParser()
    cell = u"print 'è🐙🐙🐙🐙'"

    assert_equals(u"%%spark\n{}".format(cell), parser.get_code_to_run(cell))
コード例 #15
0
def test_spark_code():
    parser = UserCodeParser()
    cell = u"my code\nand more"

    assert_equals(u"%%spark\nmy code\nand more", parser.get_code_to_run(cell))
コード例 #16
0
def test_empty_string():
    parser = UserCodeParser()

    assert_equals(u"", parser.get_code_to_run(u""))
コード例 #17
0
def test_our_line_magics():
    parser = UserCodeParser()
    magic_name = KernelMagics.info.__name__
    cell = u"%{}".format(magic_name)

    assert_equals(u"%%{}\n ".format(magic_name), parser.get_code_to_run(cell))
コード例 #18
0
def test_empty_string():
    parser = UserCodeParser()

    assert_equals(u"", parser.get_code_to_run(u""))
コード例 #19
0
def test_our_line_magics():
    parser = UserCodeParser()
    magic_name = KernelMagics.info.__name__
    cell = u"%{}".format(magic_name)

    assert_equals(u"%%{}\n ".format(magic_name), parser.get_code_to_run(cell))
コード例 #20
0
def test_unicode():
    parser = UserCodeParser()
    cell = u"print 'è🐙🐙🐙🐙'"

    assert_equals(u"%%spark\n{}".format(cell), parser.get_code_to_run(cell))
コード例 #21
0
def test_spark_code():
    parser = UserCodeParser()
    cell = u"my code\nand more"

    assert_equals(u"%%spark\nmy code\nand more", parser.get_code_to_run(cell))
コード例 #22
0
class SparkKernelBase(IPythonKernel):
    def __init__(self,
                 implementation,
                 implementation_version,
                 language,
                 language_version,
                 language_info,
                 session_language,
                 user_code_parser=None,
                 **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(
            self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def do_execute(self,
                   code,
                   silent,
                   store_history=True,
                   user_expressions=None,
                   allow_stdin=False):
        def f(self):
            if self._fatal_error is not None:
                return self._repeat_fatal_error()

            return self._do_execute(code, silent, store_history,
                                    user_expressions, allow_stdin)

        return wrap_unexpected_exceptions(f, self._complete_cell)(self)

    def do_shutdown(self, restart):
        # Cleanup
        # self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    def _do_execute(self, code, silent, store_history, user_expressions,
                    allow_stdin):
        code_to_run = self.user_code_parser.get_code_to_run(code)

        res = self._execute_cell(code_to_run, silent, store_history,
                                 user_expressions, allow_stdin)

        return res

    def _load_magics_extension(self):
        register_magics_code = "%load_ext sparkmagic.kernels"
        self._execute_cell(
            register_magics_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to load the Spark kernels magics library.")
        self.logger.debug("Loaded magics.")

    def _change_language(self):
        register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(
            self.session_language)
        self._execute_cell(
            register_magics_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to change language to {}.".format(
                self.session_language))
        self.logger.debug("Changed language.")

    def _register_auto_viz(self):
        from sparkmagic.utils.sparkevents import get_spark_events_handler
        import autovizwidget.utils.configuration as c

        handler = get_spark_events_handler()
        c.override("events_handler", handler)

        register_auto_viz_code = """from autovizwidget.widget.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(
            register_auto_viz_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to register auto viz for notebook.")
        self.logger.debug("Registered auto viz.")

    def _delete_session(self):
        code = "%%_do_not_call_delete_session\n "
        self._execute_cell_for_user(code, True, False)

    def _execute_cell(self,
                      code,
                      silent,
                      store_history=True,
                      user_expressions=None,
                      allow_stdin=False,
                      shutdown_if_error=False,
                      log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent,
                                                    store_history,
                                                    user_expressions,
                                                    allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(
                    log_if_error, error_from_reply)
                return self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self,
                               code,
                               silent,
                               store_history=True,
                               user_expressions=None,
                               allow_stdin=False):
        return super(SparkKernelBase,
                     self).do_execute(code, silent, store_history,
                                      user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _complete_cell(self):
        """A method that runs a cell with no effect. Call this and return the value it
        returns when there's some sort of error preventing the user's cell from executing; this
        will register the cell from the Jupyter UI as being completed."""
        return self._execute_cell("None", False, True, None, False)

    def _show_user_error(self, message):
        self.logger.error(message)
        self.ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        return self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self.logger.error(error)
        self.ipython_display.send_error(error)
        return self._complete_cell()
コード例 #23
0
class SparkKernelBase(IPythonKernel):
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        def f(self):
            if self._fatal_error is not None:
                return self._repeat_fatal_error()

            return self._do_execute(code, silent, store_history, user_expressions, allow_stdin)
        return wrap_unexpected_exceptions(f, self._complete_cell)(self)

    def do_shutdown(self, restart):
        # Cleanup
        self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin):
        code_to_run = self.user_code_parser.get_code_to_run(code)

        res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin)

        return res

    def _load_magics_extension(self):
        register_magics_code = "%load_ext sparkmagic.kernels"
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to load the Spark kernels magics library.")
        self.logger.debug("Loaded magics.")

    def _change_language(self):
        register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(self.session_language)
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to change language to {}.".format(self.session_language))
        self.logger.debug("Changed language.")

    def _register_auto_viz(self):
        from sparkmagic.utils.sparkevents import get_spark_events_handler
        import autovizwidget.utils.configuration as c
        
        handler = get_spark_events_handler()
        c.override("events_handler", handler)
        
        register_auto_viz_code = """from autovizwidget.widget.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to register auto viz for notebook.")
        self.logger.debug("Registered auto viz.")

    def _delete_session(self):
        code = "%%_do_not_call_delete_session\n "
        self._execute_cell_for_user(code, True, False)

    def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False,
                      shutdown_if_error=False, log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply)
                return self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _complete_cell(self):
        """A method that runs a cell with no effect. Call this and return the value it
        returns when there's some sort of error preventing the user's cell from executing; this
        will register the cell from the Jupyter UI as being completed."""
        return self._execute_cell("None", False, True, None, False)

    def _show_user_error(self, message):
        self.logger.error(message)
        self.ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        return self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self.logger.error(error)
        self.ipython_display.send_error(error)
        return self._complete_cell()