コード例 #1
0
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()
コード例 #2
0
def test_stderr_flush():
    ipython_shell = MagicMock()
    ipython_display = IpythonDisplay()
    ipython_display._ipython_shell = ipython_shell
    sys.stderr = MagicMock()

    ipython_display.send_error(u'Testing Stderr Flush è')
    assert sys.stderr.flush.call_count == 1
コード例 #3
0
def test_stdout_flush():
    ipython_shell = MagicMock()
    ipython_display = IpythonDisplay()
    ipython_display._ipython_shell = ipython_shell
    sys.stdout = MagicMock()

    ipython_display.write(u'Testing Stdout Flush è')
    assert sys.stdout.flush.call_count == 1
コード例 #4
0
ファイル: linkismagic.py プロジェクト: zwx-master/Prophecis
 def __init__(self, shell, data=None, widget=None):
     super(LinkisMagic, self).__init__(shell)
     self.ipython_display = IpythonDisplay()
     self.data = data
     #        if widget is None:
     #            widget = MagicsControllerWidget(self.spark_controller, IpyWidgetFactory(), self.ipython_display)
     #        self.manage_widget = widget
     self.linkis_client = LinkisClient()
コード例 #5
0
def test_stdout_flush():
    ipython_shell = MagicMock()
    ipython_display = IpythonDisplay()
    ipython_display._ipython_shell = ipython_shell
    sys.stdout = MagicMock()

    ipython_display.write(u'Testing Stdout Flush è')
    assert sys.stdout.flush.call_count == 1
コード例 #6
0
def test_stderr_flush():
    ipython_shell = MagicMock()
    ipython_display = IpythonDisplay()
    ipython_display._ipython_shell = ipython_shell
    sys.stderr = MagicMock()

    ipython_display.send_error(u'Testing Stderr Flush è')
    assert sys.stderr.flush.call_count == 1
コード例 #7
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = u"""<table>
<tr><th>ID</th><th>YARN Application ID</th><th>Kind</th><th>State</th><th>Spark UI</th><th>Driver log</th><th>Current session?</th></tr>""" + \
                u"".join([SparkMagicBase._session_row_html(session, current_session_id) for session in info_sessions]) + \
                u"</table>"
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')

    @staticmethod
    def _session_row_html(session, current_session_id):
        return u"""<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td><td>{4}</td><td>{5}</td><td>{6}</td></tr>""".format(
            session.id, session.get_app_id(), session.kind, session.status,
            SparkMagicBase._link(u'Link', session.get_spark_ui_url()),
            SparkMagicBase._link(u'Link', session.get_driver_log_url()),
            u"" if current_session_id is None
            or current_session_id != session.id else u"✔")

    @staticmethod
    def _link(text, url):
        if url is not None:
            return u"""<a target="_blank" href="{1}">{0}</a>""".format(
                text, url)
        else:
            return u""
コード例 #8
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = u"""<table>
<tr><th>ID</th><th>YARN Application ID</th><th>Kind</th><th>State</th><th>Spark UI</th><th>Driver log</th><th>Current session?</th></tr>""" + \
                u"".join([SparkMagicBase._session_row_html(session, current_session_id) for session in info_sessions]) + \
                u"</table>"
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')

    @staticmethod
    def _session_row_html(session, current_session_id):
        return u"""<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td><td>{4}</td><td>{5}</td><td>{6}</td></tr>""".format(
            session.id, session.get_app_id(), session.kind, session.status,
            SparkMagicBase._link(u'Link', session.get_spark_ui_url()), SparkMagicBase._link(u'Link', session.get_driver_log_url()),
            u"" if current_session_id is None or current_session_id != session.id else u"✔"
        )

    @staticmethod
    def _link(text, url):
        if url is not None:
            return u"""<a target="_blank" href="{1}">{0}</a>""".format(text, url)
        else:
            return u""
コード例 #9
0
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()
コード例 #10
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_spark(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name):
        (success,
         out) = self.spark_controller.run_command(Command(cell), session_name)
        if not success:
            self.ipython_display.send_error(out)
        else:
            self.ipython_display.write(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(
                    output_var, samplemethod, maxrows, samplefraction)
                df = self.spark_controller.run_command(spark_store_command,
                                                       session_name)
                self.shell.user_ns[output_var] = df

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows,
                             samplefraction):
        return SparkStoreCommand(output_var, samplemethod, maxrows,
                                 samplefraction)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
コード例 #11
0
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()
コード例 #12
0
class DataGraph(object):
    """This does not use the table version of plotly because it freezes up the browser for >60 rows. Instead, we use
    pandas df HTML representation."""
    def __init__(self, display=None):
        if display is None:
            self.display = IpythonDisplay()
        else:
            self.display = display

    def render(self, df, encoding, output):
        with output:
            max_rows = pd.get_option("display.max_rows")
            max_cols = pd.get_option("display.max_columns")
            show_dimensions = pd.get_option("display.show_dimensions")

            # This will hide the index column for pandas df.
            self.display.html("""
<style>
    table.dataframe.hideme thead th:first-child {
        display: none;
    }
    table.dataframe.hideme tbody th {
        display: none;
    }
</style>
""")
            self.display.html(df.to_html(max_rows=max_rows, max_cols=max_cols,
                                         show_dimensions=show_dimensions, notebook=True, classes="hideme"))

    @staticmethod
    def display_logarithmic_x_axis():
        return False

    @staticmethod
    def display_logarithmic_y_axis():
        return False

    @staticmethod
    def display_x():
        return False

    @staticmethod
    def display_y():
        return False
コード例 #13
0
    def __init__(self, df, encoding, renderer=None, ipywidget_factory=None, encoding_widget=None, ipython_display=None,
                 nested_widget_mode=False, spark_events=None, testing=False, **kwargs):
        assert encoding is not None
        assert df is not None
        assert type(df) is pd.DataFrame

        kwargs['orientation'] = 'vertical'

        if not testing:
            super(AutoVizWidget, self).__init__((), **kwargs)

        self.df = self._convert_to_displayable_dataframe(df)

        if renderer is None:
            renderer = GraphRenderer()
        self.renderer = renderer

        if ipywidget_factory is None:
            ipywidget_factory = IpyWidgetFactory()
        self.ipywidget_factory = ipywidget_factory

        if encoding_widget is None:
            encoding_widget = EncodingWidget(self.df, encoding, self.on_render_viz)
        self.encoding_widget = encoding_widget

        if ipython_display is None:
            ipython_display = IpythonDisplay()
        self.ipython_display = ipython_display

        self.encoding = encoding

        # Widget that will become the only child of AutoVizWidget
        self.widget = self.ipywidget_factory.get_vbox()

        # Create output area
        self.to_display = self.ipywidget_factory.get_output()
        self.to_display.width = "800px"
        self.output = self.ipywidget_factory.get_hbox()
        self.output.children = [self.to_display]

        self.controls = self._create_controls_widget()

        if spark_events is None:
            spark_events = AutoVizEvents()
        self._spark_events = spark_events

        if nested_widget_mode:
            self.widget.children = [self.controls, self.output]
            self.children = [self.widget]
        else:
            self.ipython_display.display(self.controls)
            self.ipython_display.display(self.to_display)

        self.on_render_viz()
コード例 #14
0
    def __init__(self, implementation, implementation_version, language,
                 language_version, language_info, **kwargs):
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        super(HdfsKernelBase, self).__init__(**kwargs)

        self._fatal_error = None
        self.ipython_display = IpythonDisplay()
        self.session_manager = HdfsSessionManager()
コード例 #15
0
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()
コード例 #16
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
コード例 #17
0
    def __init__(self, spark_controller, ipywidget_factory=None, ipython_display=None,
                 nested_widget_mode=False, testing=False, **kwargs):
        kwargs['orientation'] = 'vertical'

        if not testing:
            super(AbstractMenuWidget, self).__init__((), **kwargs)

        self.spark_controller = spark_controller

        if ipywidget_factory is None:
            ipywidget_factory = IpyWidgetFactory()
        self.ipywidget_factory = ipywidget_factory

        if ipython_display is None:
            ipython_display = IpythonDisplay()
        self.ipython_display = ipython_display

        self.children = []

        if not nested_widget_mode:
            self._repr_html_()
コード例 #18
0
class SparkKernelBase(IPythonKernel):
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        def f(self):
            if self._fatal_error is not None:
                return self._repeat_fatal_error()

            return self._do_execute(code, silent, store_history, user_expressions, allow_stdin)
        return wrap_unexpected_exceptions(f, self._complete_cell)(self)

    def do_shutdown(self, restart):
        # Cleanup
        self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin):
        code_to_run = self.user_code_parser.get_code_to_run(code)

        res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin)

        return res

    def _load_magics_extension(self):
        register_magics_code = "%load_ext sparkmagic.kernels"
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to load the Spark kernels magics library.")
        self.logger.debug("Loaded magics.")

    def _change_language(self):
        register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(self.session_language)
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to change language to {}.".format(self.session_language))
        self.logger.debug("Changed language.")

    def _register_auto_viz(self):
        from sparkmagic.utils.sparkevents import get_spark_events_handler
        import autovizwidget.utils.configuration as c
        
        handler = get_spark_events_handler()
        c.override("events_handler", handler)
        
        register_auto_viz_code = """from autovizwidget.widget.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to register auto viz for notebook.")
        self.logger.debug("Registered auto viz.")

    def _delete_session(self):
        code = "%%_do_not_call_delete_session\n "
        self._execute_cell_for_user(code, True, False)

    def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False,
                      shutdown_if_error=False, log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply)
                return self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _complete_cell(self):
        """A method that runs a cell with no effect. Call this and return the value it
        returns when there's some sort of error preventing the user's cell from executing; this
        will register the cell from the Jupyter UI as being completed."""
        return self._execute_cell("None", False, True, None, False)

    def _show_user_error(self, message):
        self.logger.error(message)
        self.ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        return self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self.logger.error(error)
        self.ipython_display.send_error(error)
        return self._complete_cell()
コード例 #19
0
ファイル: linkismagic.py プロジェクト: zwx-master/Prophecis
class LinkisMagic(Magics):
    def __init__(self, shell, data=None, widget=None):
        super(LinkisMagic, self).__init__(shell)
        self.ipython_display = IpythonDisplay()
        self.data = data
        #        if widget is None:
        #            widget = MagicsControllerWidget(self.spark_controller, IpyWidgetFactory(), self.ipython_display)
        #        self.manage_widget = widget
        self.linkis_client = LinkisClient()

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-o", "--output", type=str, default=None, help="Output of Job ")
    @argument("-p",
              "--path",
              type=str,
              default=None,
              help="Download output in path ")
    @argument("-q",
              "--quiet",
              type=str,
              default=True,
              help="Do not display result on console")
    def spark(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.spark, line)

        code = cell
        status, exec_id, task_id = self.linkis_client.execute("spark",
                                                              code,
                                                              run_type="spark")
        if not status:
            raise Exception("HTTPException")
        else:
            status, exec_status, log, result = self.linkis_client.get_execute_result(
                exec_id, task_id)
            if status:
                if exec_status == "Failed":
                    print(log["keyword"])
                    print(log["log"])
                else:
                    if user_input.output is not None:
                        self.shell.user_ns[user_input.output] = result
                    if user_input.path is not None:
                        status, result = self.linkis_client.download_by_pipeline_engine(
                            task_id, user_input.path)
                        if status and result == "None":
                            raise Exception("Save Error, Result dir is None")
                        elif not status or result != "Success":
                            raise Exception("Save Error, Result: " + result)
                    if user_input.quiet == "False" or user_input.quiet == "false":
                        return result
            else:
                raise Exception("HTTPException: get_execute_result error")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-o", "--output", type=str, default=None, help="Output of Job ")
    @argument("-p",
              "--path",
              type=str,
              default=None,
              help="Download output in path ")
    @argument("-q",
              "--quiet",
              type=str,
              default=True,
              help="Do not display result on console")
    @argument("-v",
              "--var",
              type=str,
              default=None,
              help="transport var from spark cluster to local python ")
    @argument("-u",
              "--upload",
              type=str,
              default=None,
              help="transport var from local python to spark cluster")
    def pyspark(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.pyspark, line)
        pyspark_code = cell

        if user_input.upload is not None:
            pyspark_code = self.linkis_client.pyspark_load_pickle_code(
                user_input.upload) + pyspark_code
            self.linkis_client.save_pickle_file(
                user_input.upload, self.shell.user_ns[user_input.upload])

        if user_input.var is not None:
            pyspark_code = pyspark_code + self.linkis_client.define_pickel_code(
                user_input.var)

        status, exec_id, task_id = self.linkis_client.execute(
            "spark", pyspark_code, run_type="python")
        if not status:
            raise Exception("HTTPException: execute job error")
        else:
            status, exec_status, log, result = self.linkis_client.get_execute_result(
                exec_id, task_id)
            if status:
                if exec_status == "Failed":
                    print(log["keyword"])
                    print(log["log"])
                else:
                    if user_input.output is not None:
                        self.shell.user_ns[user_input.output] = result
                    if user_input.path is not None:
                        status, result = self.linkis_client.download_by_pipeline_engine(
                            task_id, user_input.path)
                        if status and result == "None":
                            raise Exception("Save Error, Result dir is None")
                        elif not status or result != "Success":
                            raise Exception("Save Error, Result: " + result)
                    if user_input.var is not None:
                        self.shell.user_ns[
                            user_input.
                            var] = self.linkis_client.load_pickle_var(
                                user_input.var)
                    if user_input.upload is not None:
                        self.linkis_client.delete_upload_var(user_input.upload)
                    # self.ipython_display.display(result)
                    if user_input.quiet == "False" or user_input.quiet == "false":
                        return result
            else:
                raise Exception("HTTPException: get_execute_result error")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-o", "--output", type=str, default=None, help="Output of Job ")
    @argument("-p",
              "--path",
              type=str,
              default=None,
              help="Download output in path ")
    @argument("-q",
              "--quiet",
              type=str,
              default=True,
              help="Do not display result on console")
    def sql(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.sql, line)
        sql_code = 'if "hiveContext" not in locals().keys():\n \
        \tfrom pyspark.sql import HiveContext\n\
        \thiveContext = HiveContext(sc)\n'

        cell_list = cell.split("\n")
        for i in range(len(cell_list)):
            if "" != cell_list[i]:
                sql_code = sql_code + 'hiveContext.sql("' + cell_list[
                    i] + '").show()' + '\n'
        print(sql_code)
        status, exec_id, task_id = self.linkis_client.execute(
            "spark", sql_code, run_type="python")
        if not status:
            raise Exception("HTTPException")
        else:
            status, exec_status, log, result = self.linkis_client.get_execute_result(
                exec_id, task_id)
            if status:
                if exec_status == "Failed":
                    print(log["keyword"])
                    print(log["log"])
                else:
                    if user_input.output is not None:
                        self.shell.user_ns[user_input.output] = result
                    if user_input.path is not None:
                        status, result = self.linkis_client.download_by_pipeline_engine(
                            task_id)
                        if status and result == "None":
                            raise Exception("Save Error, Result dir is None")
                        elif not status or result != "Success":
                            raise Exception("Save Error, Result: " + result)
                    # self.ipython_display.display(result)
                    if user_input.quiet == "False" or user_input.quiet == "false":
                        return result
            else:
                raise Exception("HTTPException")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-o",
              "--output",
              type=str,
              default=None,
              help="output var of Job ")
    @argument("-p",
              "--path",
              type=str,
              default=None,
              help="Download output in path ")
    @argument("-q",
              "--quiet",
              type=str,
              default=True,
              help="Do not display result on console")
    def sparksql(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.sparksql, line)
        code = cell
        status, exec_id, task_id = self.linkis_client.execute("spark",
                                                              code,
                                                              run_type="sql")
        if not status:
            raise Exception("HTTPException")
        else:
            status, exec_status, log, result = self.linkis_client.get_execute_result(
                exec_id, task_id)
            if status:
                if exec_status == "Failed":
                    print(log["keyword"])
                    print(log["log"])
                else:
                    if user_input.output is not None:
                        self.shell.user_ns[user_input.output] = result
                    if user_input.path is not None:
                        # status, result = self.linkis_client.download_csv(task_id, user_input.path)
                        status, result = self.linkis_client.download_by_pipeline_engine(
                            task_id, user_input.path)
                        if status != True or result != "Success":
                            raise Exception("Save Error, result: " + result)
                    # self.ipython_display.display(result)
                    if user_input.quiet == "False" or user_input.quiet == "false":
                        return result
            else:
                raise Exception("HTTPException")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-o", "--output", type=str, default=None, help="Output of Job ")
    def listjob(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.listjob, line)
        job_list = self.linkis_client.job_history()
        if user_input.output is not None:
            self.shell.user_ns[user_input.output] = job_list
        else:
            return job_list

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ")
    def progress(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.kill, line)
        status, result = self.linkis_client.progress(user_input.id)
        if status:
            print(result)
        else:
            raise Exception("HTTPException")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ")
    def kill(self, line, cell="", local_ns=None):
        kill_input = parse_argstring(self.kill, line)
        #        print(kill_input.id)
        status, result = self.linkis_client.kill(kill_input.id)
        print(result)
        if status:
            print("Succeed")
        else:
            print("ERROR")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ")
    def log(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.kill, line)
        status, result = self.linkis_client.log(user_input.id)
        if status:
            print(result)
        else:
            raise Exception("HTTPException")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-i", "--id", type=str, default=None, help="Exec ID of Job ")
    def status(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.kill, line)
        status, result = self.linkis_client.status(user_input.id)
        if status:
            print(result)
        else:
            raise Exception("HTTPException")

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-o", "--output", type=str, default=None, help="Output of Job ")
    def listengine(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.listengine, line)
        status, engine_list = self.linkis_client.engines()
        if not status:
            raise Exception("Http Exception")
        if user_input.output is not None:
            self.shell.user_ns[user_input.output] = engine_list
        else:
            return engine_list

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    @argument("-i",
              "--instance",
              type=str,
              default=None,
              help="Instance of Engine ")
    def enginekill(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.enginekill, line)
        status, result = self.linkis_client.engine_kill(user_input.instance)
        if status:
            print("Success")
        else:
            raise Exception("HTTPException")

    def load_ipython_extension(ip):
        ip.register_magics(LinkisMagic)

    # 优化log显示,分为详细log和关键性息
    def log_detail(self):
        pass

    @magic_arguments()
    @line_cell_magic
    @needs_local_scope
    def flashcookies(self, line, cell="", local_ns=None):
        user_input = parse_argstring(self.listengine, line)
        self.linkis_client.refresh_cookies()
        self.ipython_display.display("Refresh Cookies Successful.")
コード例 #20
0
 def __init__(self, display=None):
     if display is None:
         self.display = IpythonDisplay()
     else:
         self.display = display
コード例 #21
0
class SparkKernelBase(IPythonKernel):
    def __init__(self,
                 implementation,
                 implementation_version,
                 language,
                 language_version,
                 language_info,
                 session_language,
                 user_code_parser=None,
                 **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(
            self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def do_execute(self,
                   code,
                   silent,
                   store_history=True,
                   user_expressions=None,
                   allow_stdin=False):
        def f(self):
            if self._fatal_error is not None:
                return self._repeat_fatal_error()

            return self._do_execute(code, silent, store_history,
                                    user_expressions, allow_stdin)

        return wrap_unexpected_exceptions(f, self._complete_cell)(self)

    def do_shutdown(self, restart):
        # Cleanup
        # self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    def _do_execute(self, code, silent, store_history, user_expressions,
                    allow_stdin):
        code_to_run = self.user_code_parser.get_code_to_run(code)

        res = self._execute_cell(code_to_run, silent, store_history,
                                 user_expressions, allow_stdin)

        return res

    def _load_magics_extension(self):
        register_magics_code = "%load_ext sparkmagic.kernels"
        self._execute_cell(
            register_magics_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to load the Spark kernels magics library.")
        self.logger.debug("Loaded magics.")

    def _change_language(self):
        register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(
            self.session_language)
        self._execute_cell(
            register_magics_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to change language to {}.".format(
                self.session_language))
        self.logger.debug("Changed language.")

    def _register_auto_viz(self):
        from sparkmagic.utils.sparkevents import get_spark_events_handler
        import autovizwidget.utils.configuration as c

        handler = get_spark_events_handler()
        c.override("events_handler", handler)

        register_auto_viz_code = """from autovizwidget.widget.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(
            register_auto_viz_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to register auto viz for notebook.")
        self.logger.debug("Registered auto viz.")

    def _delete_session(self):
        code = "%%_do_not_call_delete_session\n "
        self._execute_cell_for_user(code, True, False)

    def _execute_cell(self,
                      code,
                      silent,
                      store_history=True,
                      user_expressions=None,
                      allow_stdin=False,
                      shutdown_if_error=False,
                      log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent,
                                                    store_history,
                                                    user_expressions,
                                                    allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(
                    log_if_error, error_from_reply)
                return self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self,
                               code,
                               silent,
                               store_history=True,
                               user_expressions=None,
                               allow_stdin=False):
        return super(SparkKernelBase,
                     self).do_execute(code, silent, store_history,
                                      user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _complete_cell(self):
        """A method that runs a cell with no effect. Call this and return the value it
        returns when there's some sort of error preventing the user's cell from executing; this
        will register the cell from the Jupyter UI as being completed."""
        return self._execute_cell("None", False, True, None, False)

    def _show_user_error(self, message):
        self.logger.error(message)
        self.ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        return self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self.logger.error(error)
        self.ipython_display.send_error(error)
        return self._complete_cell()
コード例 #22
0
import subprocess
import re
import random
import urllib3.util
from hdijupyterutils.ipythondisplay import IpythonDisplay
import ipyvuetify as v
from google.cloud import dataproc_v1beta2
import google.auth.transport.requests
from google.auth import _cloud_sdk
from google.auth.exceptions import UserAccessTokenError
from google.oauth2.credentials import Credentials
from sparkmagic.auth.customauth import Authenticator
from sparkmagic.livyclientlib.exceptions import BadUserConfigurationException
import googledataprocauthenticator.utils.constants as constants

ipython_display = IpythonDisplay()


def list_credentialed_user_accounts():
    """Load all of user's credentialed accounts with ``gcloud auth list`` command.

    Returns:
        Sequence[str]: each value is a str of one of the users credentialed accounts

    Raises:
        sparkmagic.livyclientlib.BadUserConfigurationException: if gcloud cannot be invoked
    """
    accounts_json = ""
    if os.name == "nt":
        command = constants.CLOUD_SDK_WINDOWS_COMMAND
    else:
コード例 #23
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def _get_session_name_by_session(self, session):
        session_name = self.spark_controller.session_manager.get_session_name_by_id(
            session.id)
        # 如果session不存在,则将session激活并加入session_list
        if not session_name:
            session_name = session.session_name
            if session_name:
                self.spark_controller.session_manager.add_session(
                    session_name, session)
                session.already_start()
                return session_name
        else:
            return session_name

        return None

    def init_livy_session(self, language="python"):
        '''
            执行sql时自动初始化sql
        :return:
        '''
        return self.__get_or_create_session(language)

    def __get_or_create_session(self, language):
        proxy_user = getpass.getuser()

        self.session_language = language
        endpoint = build_endpoint(self.session_language)
        kernel_instance_id = id(self.shell.kernel)
        session_name_seleted = self.spark_controller.generate_livy_session_name(
            kernel_instance_id)

        properties = conf.get_session_properties(self.session_language)
        properties["proxyUser"] = proxy_user
        properties["session_language"] = self.session_language
        properties["session_name"] = session_name_seleted

        session_info_list = self.spark_controller.get_all_sessions_endpoint(
            endpoint)
        for session in session_info_list:
            # session kind 必须一致
            if session.kind != properties['kind']:
                continue

            # 区分pyspark 及 pyspark3
            if session.session_language != properties['session_language']:
                continue

            session_name = self._get_session_name_by_session(session)
            if session_name == session_name_seleted:
                if session.status in constants.HEALTHY_SESSION_STATUS:
                    return session_name_seleted
                elif session.status in constants.FINAL_STATEMENT_STATUS:
                    # FINAL, recreate new session
                    self.spark_controller.add_session(session_name_seleted,
                                                      endpoint, False,
                                                      properties)
                    return session_name_seleted
        else:
            # 如果livy中没有session,则创建session
            self.spark_controller.add_session(session_name_seleted, endpoint,
                                              False, properties)
            return session_name_seleted

    def execute_spark(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name, coerce):
        (success,
         out) = self.spark_controller.run_command(Command(cell), session_name)
        if not success:
            self.ipython_display.send_error(out)
        else:
            if isinstance(out, string_types):
                self.ipython_display.write(out)
            elif isinstance(out, dict):
                df = convert_data_struct_to_dataframe(out)
                html = df.fillna('NULL').astype(str).to_html(notebook=True)
                self.ipython_display.html(html)
            else:
                self.ipython_display.write(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(
                    output_var, samplemethod, maxrows, samplefraction, coerce)
                df = self.spark_controller.run_command(spark_store_command,
                                                       session_name)
                self.shell.user_ns[output_var] = df

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows, samplefraction,
                             coerce):
        return SparkStoreCommand(output_var,
                                 samplemethod,
                                 maxrows,
                                 samplefraction,
                                 coerce=coerce)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet, coerce):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction,
                                  coerce)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce):
        return SQLQuery(cell,
                        samplemethod,
                        maxrows,
                        samplefraction,
                        coerce=coerce)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
コード例 #24
0
class SparkMagicBase(Magics):

    _STRING_VAR_TYPE = 'str'
    _PANDAS_DATAFRAME_VAR_TYPE = 'df'
    _ALLOWED_LOCAL_TO_SPARK_TYPES = [
        _STRING_VAR_TYPE, _PANDAS_DATAFRAME_VAR_TYPE
    ]

    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug(u'Initialized spark magics.')

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def do_send_to_spark(self, cell, input_variable_name, var_type,
                         output_variable_name, max_rows, session_name):
        try:
            input_variable_value = self.shell.user_ns[input_variable_name]
        except KeyError:
            raise BadUserDataException(
                u'Variable named {} not found.'.format(input_variable_name))
        if input_variable_value is None:
            raise BadUserDataException(
                u'Value of {} is None!'.format(input_variable_name))

        if not output_variable_name:
            output_variable_name = input_variable_name

        if not max_rows:
            max_rows = conf.default_maxrows()

        input_variable_type = var_type.lower()
        if input_variable_type == self._STRING_VAR_TYPE:
            command = SendStringToSparkCommand(input_variable_name,
                                               input_variable_value,
                                               output_variable_name)
        elif input_variable_type == self._PANDAS_DATAFRAME_VAR_TYPE:
            command = SendPandasDfToSparkCommand(input_variable_name,
                                                 input_variable_value,
                                                 output_variable_name,
                                                 max_rows)
        else:
            raise BadUserDataException(
                u'Invalid or incorrect -t type. Available are: [{}]'.format(
                    u','.join(self._ALLOWED_LOCAL_TO_SPARK_TYPES)))

        (success, result,
         mime_type) = self.spark_controller.run_command(command, None)
        if not success:
            self.ipython_display.send_error(result)
        else:
            self.ipython_display.write(
                u'Successfully passed \'{}\' as \'{}\' to Spark'
                u' kernel'.format(input_variable_name, output_variable_name))

    def execute_final(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name, coerce):
        (success, out,
         mimetype) = self.spark_controller.run_command(Command(cell),
                                                       session_name)
        if not success:
            if conf.shutdown_session_on_spark_statement_errors():
                self.spark_controller.cleanup()

            raise SparkStatementException(out)
        else:
            if isinstance(out, string_types):
                if mimetype == MIMETYPE_TEXT_HTML:
                    self.ipython_display.html(out)
                else:
                    self.ipython_display.write(out)
            else:
                self.ipython_display.display(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(
                    output_var, samplemethod, maxrows, samplefraction, coerce)
                df = self.spark_controller.run_command(spark_store_command,
                                                       session_name)
                self.shell.user_ns[output_var] = df

    def execute_spark(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name, coerce):

        if "lagom as" in cell:
            self.ipython_display.send_error(
                "You are not allowed to do the following: 'import maggy.experiment.lagom as ...'. Please, just use 'import maggy.experiment as experiment' (or something else)"
            )
            raise
        elif ".lagom" in cell:
            client = Client(self.spark_controller, self.session_name, 5,
                            self.ipython_display)
            try:
                client.start_heartbeat()
                if DEBUG:
                    self.ipython_display.writeln("Started heartbeating...")
                self.execute_final(cell, output_var, samplemethod, maxrows,
                                   samplefraction, session_name, coerce)
            except:
                raise
            finally:
                # 4. Kill thread before leaving current scope
                client.stop()
                try:
                    client.close()
                except:
                    if DEBUG:
                        print("Socket already closed by maggy server.")
                    pass
        else:
            self.execute_final(cell, output_var, samplemethod, maxrows,
                               samplefraction, session_name, coerce)

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows, samplefraction,
                             coerce):
        return SparkStoreCommand(output_var,
                                 samplemethod,
                                 maxrows,
                                 samplefraction,
                                 coerce=coerce)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet, coerce):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction,
                                  coerce)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce):
        return SQLQuery(cell,
                        samplemethod,
                        maxrows,
                        samplefraction,
                        coerce=coerce)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
コード例 #25
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_final(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name, coerce):
        (success,
         out) = self.spark_controller.run_command(Command(cell), session_name)
        if not success:
            self.ipython_display.send_error(out)
        else:
            self.ipython_display.write(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(
                    output_var, samplemethod, maxrows, samplefraction, coerce)
                df = self.spark_controller.run_command(spark_store_command,
                                                       session_name)
                self.shell.user_ns[output_var] = df

    def execute_spark(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name, coerce):

        if "lagom as" in cell:
            self.ipython_display.send_error(
                "You are not allowed to do the following: 'import maggy.experiment.lagom as ...'. Please, just use 'import maggy.experiment as experiment' (or something else)"
            )
            raise
        elif ".lagom" in cell:
            client = Client(self.spark_controller, self.session_name, 5,
                            self.ipython_display)
            try:
                client.start_heartbeat()
                if DEBUG:
                    self.ipython_display.writeln("Started heartbeating...")
                self.execute_final(cell, output_var, samplemethod, maxrows,
                                   samplefraction, session_name, coerce)
            except:
                raise
            finally:
                # 4. Kill thread before leaving current scope
                client.stop()
                try:
                    client.close()
                except:
                    if DEBUG:
                        print("Socket already closed by maggy server.")
                    pass
        else:
            self.execute_final(cell, output_var, samplemethod, maxrows,
                               samplefraction, session_name, coerce)

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows, samplefraction,
                             coerce):
        return SparkStoreCommand(output_var,
                                 samplemethod,
                                 maxrows,
                                 samplefraction,
                                 coerce=coerce)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet, coerce):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction,
                                  coerce)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce):
        return SQLQuery(cell,
                        samplemethod,
                        maxrows,
                        samplefraction,
                        coerce=coerce)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
コード例 #26
0
class SparkKernelBase(IPythonKernel):
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)
        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            # 项目启动的时候初始化sparkmagic.magic 和session
            self._load_spark_magics_extension()
            self._init_livy_session()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def _is_sql_filter(self, code):
        if conf.is_sql_restrict():
            if re.search(r'\s*show\s+databases', code.lower()):
                return True

            if re.search(r'\s*use\s+', code.lower()):
                return True

        return False

    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        def f(self):
            if self._is_sql_filter(code):
                self.ipython_display.write("已为您选择好专属数据库, 直接使用show tables 试试看")
                return self._complete_cell()

            if self._fatal_error is not None:
                return self._repeat_fatal_error()

            return self._do_execute(code, silent, store_history, user_expressions, allow_stdin)
        return wrap_unexpected_exceptions(f, self._complete_cell)(self)

    def do_shutdown(self, restart):
        # Cleanup
        self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin):
        code_to_run = self.user_code_parser.get_code_to_run(code)

        res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin)

        return res

    def _load_magics_extension(self):
        register_magics_code = "%load_ext sparkmagic.kernels"
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to load the Spark kernels magics library.")
        self.logger.debug("Loaded magics.")

    def _load_spark_magics_extension(self):
        '''
            初始化spark.magic,类似执行%load_ext sparkmagic.magics
        :return:
        '''
        register_spark_magics_code = "%load_ext sparkmagic.magics"
        self._execute_cell(register_spark_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to load the Spark Magics library.")
        self.logger.debug("Loaded sparkmagic.magics")

    def _change_language(self):
        register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(self.session_language)
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to change language to {}.".format(self.session_language))
        self.logger.debug("Changed language.")

    def _init_livy_session(self):
        '''
            初始化session不应该在此类执行具体操作,应该委派kernelmagics初始化session,
        :return:
        '''
        register_magics_code = "%%_do_not_call_init_livy_session -i {}\n ".format(self.session_language)
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to init livy session: {}.".format(self.session_language))
        self.logger.debug("Init livy session.")

    def _register_auto_viz(self):
        from sparkmagic.utils.sparkevents import get_spark_events_handler
        import autovizwidget.utils.configuration as c

        handler = get_spark_events_handler()
        c.override("events_handler", handler)

        register_auto_viz_code = """from autovizwidget.widget.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to register auto viz for notebook.")
        self.logger.debug("Registered auto viz.")

    def _delete_session(self):
        code = "%%_do_not_call_delete_session\n "
        self._execute_cell_for_user(code, True, False)

    def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False,
                      shutdown_if_error=False, log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply)
                return self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _complete_cell(self):
        """A method that runs a cell with no effect. Call this and return the value it
        returns when there's some sort of error preventing the user's cell from executing; this
        will register the cell from the Jupyter UI as being completed."""
        return self._execute_cell("None", False, True, None, False)

    def _show_user_error(self, message):
        self.logger.error(message)
        self.ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        return self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self.logger.error(error)
        self.ipython_display.send_error(error)
        return self._complete_cell()

    @gen.coroutine
    def complete_request(self, stream, ident, parent):
        content = parent['content']
        code = content['code']
        cursor_pos = content['cursor_pos']

        matches = yield gen.maybe_future(self.do_complete(code, cursor_pos))
        matches = json_clean(matches)
        completion_msg = self.session.send(stream, 'complete_reply',
                                           matches, parent, ident)

    def _experimental_do_complete(self, code, cursor_pos):
        """
        Experimental completions from IPython, using livy completion.
        """

        code = code.strip()
        if cursor_pos is None:
            cursor_pos = len(code)

        matches = []
        with provisionalcompleter():
            session_name = self.spark_controller.generate_livy_session_name(id(self))

            endpoint = build_endpoint(self.session_language)
            session_info_list = self.spark_controller.get_all_sessions_endpoint(endpoint)
            session_id = None
            for session in session_info_list:
                if session.session_name == session_name:
                    session_id = session.id

            if session_id:
                # Only complete the cursor_line
                cursor_line, cursor_column = position_to_cursor(code, cursor_pos)
                lines = code.split("\n")
                completion_line = lines[cursor_line]
                before_lines = lines[:cursor_line]
                if len(lines) > 1 and cursor_line > 0:
                    real_cursor_pos = cursor_pos - len("\n".join(before_lines)) - 1
                else:
                    real_cursor_pos = cursor_pos

                http_client = self.spark_controller._http_client(endpoint)
                kind = conf.get_livy_kind(self.session_language)
                res_completions = http_client.post_completion(session_id, kind, completion_line, real_cursor_pos)
                matches = res_completions.get("candidates", [])

        if matches:
            s = self.__get_cursor_start(code, cursor_pos, matches[0])
        else:
            s = cursor_pos

        res = {
            'matches': matches,
            'cursor_end': cursor_pos,
            'cursor_start': s,
            'metadata': {},
            'status': 'ok'
        }
        return res

    def __get_cursor_start(self, code, cursor_pos, match):
        before_code = code[:cursor_pos]
        before_code_rev = before_code[::-1]
        bucket = []
        for c in before_code_rev:
            if len(bucket) >= len(match):
                break

            if re.match(r"\w", c):
                bucket.insert(0, c)
            else:
                break

            if c == match[0]:
                bucket_len = len(bucket)
                completion_match_prefix = "".join(bucket)
                if completion_match_prefix == match[:bucket_len]:
                    return cursor_pos - bucket_len

        return cursor_pos

    def do_apply(self, content, bufs, msg_id, reply_metadata):
        from sparkmagic.messages_api.apply_request import ApplyRequestHandler
        result_buf = []
        reply_content = ApplyRequestHandler(self).dispath_request(content)
        return reply_content, result_buf
コード例 #27
0
class SparkMagicBase(Magics):

    _STRING_VAR_TYPE = 'str'
    _PANDAS_DATAFRAME_VAR_TYPE = 'df'
    _ALLOWED_LOCAL_TO_SPARK_TYPES = [
        _STRING_VAR_TYPE, _PANDAS_DATAFRAME_VAR_TYPE
    ]

    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug(u'Initialized spark magics.')

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def do_send_to_spark(self, cell, input_variable_name, var_type,
                         output_variable_name, max_rows, session_name):
        try:
            input_variable_value = self.shell.user_ns[input_variable_name]
        except KeyError:
            raise BadUserDataException(
                u'Variable named {} not found.'.format(input_variable_name))
        if input_variable_value is None:
            raise BadUserDataException(
                u'Value of {} is None!'.format(input_variable_name))

        if not output_variable_name:
            output_variable_name = input_variable_name

        if not max_rows:
            max_rows = conf.default_maxrows()

        input_variable_type = var_type.lower()
        if input_variable_type == self._STRING_VAR_TYPE:
            command = SendStringToSparkCommand(input_variable_name,
                                               input_variable_value,
                                               output_variable_name)
        elif input_variable_type == self._PANDAS_DATAFRAME_VAR_TYPE:
            command = SendPandasDfToSparkCommand(input_variable_name,
                                                 input_variable_value,
                                                 output_variable_name,
                                                 max_rows)
        else:
            raise BadUserDataException(
                u'Invalid or incorrect -t type. Available are: [{}]'.format(
                    u','.join(self._ALLOWED_LOCAL_TO_SPARK_TYPES)))

        (success, result,
         mime_type) = self.spark_controller.run_command(command, None)
        if not success:
            self.ipython_display.send_error(result)
        else:
            self.ipython_display.write(
                u'Successfully passed \'{}\' as \'{}\' to Spark'
                u' kernel'.format(input_variable_name, output_variable_name))

    def execute_spark(self,
                      cell,
                      output_var,
                      samplemethod,
                      maxrows,
                      samplefraction,
                      session_name,
                      coerce,
                      output_handler=None):
        output_handler = output_handler or SparkOutputHandler(
            html=self.ipython_display.html,
            text=self.ipython_display.write,
            default=self.ipython_display.display)

        (success, out,
         mimetype) = self.spark_controller.run_command(Command(cell),
                                                       session_name)
        if not success:
            if conf.shutdown_session_on_spark_statement_errors():
                self.spark_controller.cleanup()

            raise SparkStatementException(out)
        else:
            if isinstance(out, string_types):
                if mimetype == MIMETYPE_TEXT_HTML:
                    output_handler.html(out)
                else:
                    output_handler.text(out)
            else:
                output_handler.default(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(
                    output_var, samplemethod, maxrows, samplefraction, coerce)
                df = self.spark_controller.run_command(spark_store_command,
                                                       session_name)
                self.shell.user_ns[output_var] = df

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows, samplefraction,
                             coerce):
        return SparkStoreCommand(output_var,
                                 samplemethod,
                                 maxrows,
                                 samplefraction,
                                 coerce=coerce)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet, coerce):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction,
                                  coerce)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce):
        return SQLQuery(cell,
                        samplemethod,
                        maxrows,
                        samplefraction,
                        coerce=coerce)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')