Example #1
0
 def logs(self, line, cell="", local_ns=None):
     parse_argstring_or_throw(self.logs, line)
     self._assure_cell_body_is_empty(KernelMagics.logs.__name__, cell)
     if self.session_started:
         out = self.spark_controller.get_logs()
         self.ipython_display.write(out)
     else:
         self.ipython_display.write(u"No logs yet.")
Example #2
0
 def logs(self, line, cell="", local_ns=None):
     parse_argstring_or_throw(self.logs, line)
     self._assure_cell_body_is_empty(KernelMagics.logs.__name__, cell)
     if self.session_started:
         out = self.spark_controller.get_logs()
         self.ipython_display.write(out)
     else:
         self.ipython_display.write("No logs yet.")
Example #3
0
 def spark(self, line, cell="", local_ns=None):
     parse_argstring_or_throw(self.spark, line)
     if self._do_not_call_start_session(u""):
         (success, out) = self.spark_controller.run_command(Command(cell))
         if success:
             self.ipython_display.write(out)
         else:
             self.ipython_display.send_error(out)
     else:
         return None
Example #4
0
 def spark(self, line, cell="", local_ns=None):
     parse_argstring_or_throw(self.spark, line)
     if self._do_not_call_start_session(""):
         (success, out) = self.spark_controller.run_command(Command(cell))
         if success:
             self.ipython_display.write(out)
         else:
             self.ipython_display.send_error(out)
     else:
         return None
Example #5
0
    def delete(self, line, cell="", local_ns=None):
        self._assure_cell_body_is_empty(KernelMagics.delete.__name__, cell)
        args = parse_argstring_or_throw(self.delete, line)
        session = args.session

        if args.session is None:
            self.ipython_display.send_error(
                'You must provide a session ID (-s argument).')
            return

        if args.force:
            id = self.spark_controller.get_session_id_for_client(
                self.session_name)
            if session == id:
                self.ipython_display.send_error(
                    "Cannot delete this kernel's session ({}). Specify a different session,"
                    " shutdown the kernel to delete this session, or run %cleanup to "
                    "delete all sessions for this endpoint.".format(id))
                return

            self.spark_controller.delete_session_by_id(self.endpoint, session)
        else:
            self.ipython_display.send_error(
                "Include the -f parameter if you understand that all statements executed "
                "in this session will be lost.")
Example #6
0
 def sql(self, line, cell="", local_ns=None):
     if self._do_not_call_start_session(""):
         args = parse_argstring_or_throw(self.sql, line)
         return self.execute_sqlquery(cell, args.samplemethod, args.maxrows, args.samplefraction,
                                      None, args.output, args.quiet)
     else:
         return
Example #7
0
 def sql(self, line, cell="", local_ns=None):
     if self._do_not_call_start_session(""):
         args = parse_argstring_or_throw(self.sql, line)
         return self.execute_sqlquery(cell, args.samplemethod, args.maxrows,
                                      args.samplefraction, None,
                                      args.output, args.quiet)
     else:
         return
Example #8
0
    def info(self, line, cell="", local_ns=None):
        parse_argstring_or_throw(self.info, line)
        self._assure_cell_body_is_empty(KernelMagics.info.__name__, cell)
        self.ipython_display.writeln("Endpoint:\n\t{}\n".format(
            self.endpoint.url))

        self.ipython_display.writeln(
            "Current session ID number:\n\t{}\n".format(
                self.spark_controller.get_session_id_for_client(
                    self.session_name)))

        self.ipython_display.writeln("Session configs:\n\t{}\n".format(
            conf.get_session_properties(self.language)))

        info_sessions = self.spark_controller.get_all_sessions_endpoint_info(
            self.endpoint)
        self.print_endpoint_info(info_sessions)
Example #9
0
    def cleanup(self, line, cell="", local_ns=None):
        self._assure_cell_body_is_empty(KernelMagics.cleanup.__name__, cell)
        args = parse_argstring_or_throw(self.cleanup, line)
        if args.force:
            self._do_not_call_delete_session(u"")

            self.spark_controller.cleanup_endpoint(self.endpoint)
        else:
            self.ipython_display.send_error(u"When you clean up the endpoint, all sessions will be lost, including the "
                                            u"one used for this notebook. Include the -f parameter if that's your "
                                            u"intention.")
            return
Example #10
0
    def cleanup(self, line, cell="", local_ns=None):
        self._assure_cell_body_is_empty(KernelMagics.cleanup.__name__, cell)
        args = parse_argstring_or_throw(self.cleanup, line)
        if args.force:
            self._do_not_call_delete_session("")

            self.spark_controller.cleanup_endpoint(self.endpoint)
        else:
            self.ipython_display.send_error(
                "When you clean up the endpoint, all sessions will be lost, including the "
                "one used for this notebook. Include the -f parameter if that's your "
                "intention.")
            return
Example #11
0
    def info(self, line, cell=u"", local_ns=None):
        parse_argstring_or_throw(self.info, line)
        self._assure_cell_body_is_empty(KernelMagics.info.__name__, cell)
        app_id = driver_log_url = spark_ui_url = None
        if self.session_started:
            app_id = self.spark_controller.get_app_id()
            driver_log_url = self.spark_controller.get_driver_log_url()
            spark_ui_url = self.spark_controller.get_spark_ui_url()

        self.ipython_display.writeln(u"Endpoint:\n\t{}\n".format(self.endpoint.url))

        self.ipython_display.writeln(u"Current session ID number:\n\t{}\n".format(
                self.spark_controller.get_session_id_for_client(self.session_name)))

        self.ipython_display.writeln(u"YARN Application ID:\n\t{}\n".format(app_id))

        self.ipython_display.writeln(u"Session configs:\n\t{}\n".format(conf.get_session_properties(self.language)))

        self.ipython_display.writeln(u"Driver log:\n\t{}\n".format(driver_log_url))
        self.ipython_display.writeln(u"Spark UI:\n\t{}\n".format(spark_ui_url))

        info_sessions = self.spark_controller.get_all_sessions_endpoint_info(self.endpoint)
        self.print_endpoint_info(info_sessions)
Example #12
0
    def _do_not_call_change_language(self, line, cell="", local_ns=None):
        args = parse_argstring_or_throw(self._do_not_call_change_language, line)
        language = args.language.lower()

        if language not in LANGS_SUPPORTED:
            self.ipython_display.send_error(u"'{}' language not supported in kernel magics.".format(language))
            return

        if self.session_started:
            self.ipython_display.send_error(u"Cannot change the language if a session has been started.")
            return

        self.language = language
        self.refresh_configuration()
Example #13
0
    def _do_not_call_change_language(self, line, cell="", local_ns=None):
        args = parse_argstring_or_throw(self._do_not_call_change_language,
                                        line)
        language = args.language.lower()

        if language not in LANGS_SUPPORTED:
            self.ipython_display.send_error(
                "'{}' language not supported in kernel magics.".format(
                    language))
            return

        if self.session_started:
            self.ipython_display.send_error(
                "Cannot change the language if a session has been started.")
            return

        self.language = language
        self.refresh_configuration()
Example #14
0
 def configure(self, line, cell="", local_ns=None):
     try:
         dictionary = json.loads(cell)
     except ValueError:
         self.ipython_display.send_error(u"Could not parse JSON object from input '{}'".format(cell))
         return
     args = parse_argstring_or_throw(self.configure, line)
     if self.session_started:
         if not args.force:
             self.ipython_display.send_error(u"A session has already been started. If you intend to recreate the "
                                             u"session with new configurations, please include the -f argument.")
             return
         else:
             self._do_not_call_delete_session(u"")
             self._override_session_settings(dictionary)
             self._do_not_call_start_session(u"")
     else:
         self._override_session_settings(dictionary)
     self.info(u"")
Example #15
0
    def delete(self, line, cell="", local_ns=None):
        self._assure_cell_body_is_empty(KernelMagics.delete.__name__, cell)
        args = parse_argstring_or_throw(self.delete, line)
        session = args.session

        if args.session is None:
            self.ipython_display.send_error(u'You must provide a session ID (-s argument).')
            return

        if args.force:
            id = self.spark_controller.get_session_id_for_client(self.session_name)
            if session == id:
                self.ipython_display.send_error(u"Cannot delete this kernel's session ({}). Specify a different session,"
                                                u" shutdown the kernel to delete this session, or run %cleanup to "
                                                u"delete all sessions for this endpoint.".format(id))
                return

            self.spark_controller.delete_session_by_id(self.endpoint, session)
        else:
            self.ipython_display.send_error(u"Include the -f parameter if you understand that all statements executed "
                                            u"in this session will be lost.")
Example #16
0
 def configure(self, line, cell="", local_ns=None):
     try:
         dictionary = json.loads(cell)
     except ValueError:
         self.ipython_display.send_error(
             "Could not parse JSON object from input '{}'".format(cell))
         return
     args = parse_argstring_or_throw(self.configure, line)
     if self.session_started:
         if not args.force:
             self.ipython_display.send_error(
                 "A session has already been started. If you intend to recreate the "
                 "session with new configurations, please include the -f argument."
             )
             return
         else:
             self._do_not_call_delete_session("")
             self._override_session_settings(dictionary)
             self._do_not_call_start_session("")
     else:
         self._override_session_settings(dictionary)
     self.info("")
Example #17
0
    def help(self, line, cell="", local_ns=None):
        parse_argstring_or_throw(self.help, line)
        self._assure_cell_body_is_empty(KernelMagics.help.__name__, cell)
        help_html = """
<table>
  <tr>
    <th>Magic</th>
    <th>Example</th>
    <th>Explanation</th>
  </tr>
  <tr>
    <td>info</td>
    <td>%%info</td>
    <td>Outputs session information for the current Livy endpoint.</td>
  </tr>
  <tr>
    <td>cleanup</td>
    <td>%%cleanup -f</td>
    <td>Deletes all sessions for the current Livy endpoint, including this notebook's session. The force flag is mandatory.</td>
  </tr>
  <tr>
    <td>delete</td>
    <td>%%delete -f -s 0</td>
    <td>Deletes a session by number for the current Livy endpoint. Cannot delete this kernel's session.</td>
  </tr>
  <tr>
    <td>logs</td>
    <td>%%logs</td>
    <td>Outputs the current session's Livy logs.</td>
  </tr>
  <tr>
    <td>configure</td>
    <td>%%configure -f<br/>{"executorMemory": "1000M", "executorCores": 4}</td>
    <td>Configure the session creation parameters. The force flag is mandatory if a session has already been
    created and the session will be dropped and recreated.<br/>Look at <a href="https://github.com/cloudera/livy#request-body">
    Livy's POST /sessions Request Body</a> for a list of valid parameters. Parameters must be passed in as a JSON string.</td>
  </tr>
  <tr>
    <td>sql</td>
    <td>%%sql -o tables -q<br/>SHOW TABLES</td>
    <td>Executes a SQL query against the sqlContext.
    Parameters:
      <ul>
        <li>-o VAR_NAME: The result of the query will be available in the %%local Python context as a
          <a href="http://pandas.pydata.org/">Pandas</a> dataframe.</li>
        <li>-q: The magic will return None instead of the dataframe (no visualization).</li>
        <li>-m METHOD: Sample method, either <tt>take</tt> or <tt>sample</tt>.</li>
        <li>-n MAXROWS: The maximum number of rows of a SQL query that will be pulled from Livy to Jupyter.
            If this number is negative, then the number of rows will be unlimited.</li>
        <li>-r FRACTION: Fraction used for sampling.</li>
      </ul>
    </td>
  </tr>
  <tr>
    <td>local</td>
    <td>%%local<br/>a = 1</td>
    <td>All the code in subsequent lines will be executed locally. Code must be valid Python code.</td>
  </tr>
</table>
"""
        self.ipython_display.html(help_html)
Example #18
0
    def help(self, line, cell="", local_ns=None):
        parse_argstring_or_throw(self.help, line)
        self._assure_cell_body_is_empty(KernelMagics.help.__name__, cell)
        help_html = u"""
<table>
  <tr>
    <th>Magic</th>
    <th>Example</th>
    <th>Explanation</th>
  </tr>
  <tr>
    <td>info</td>
    <td>%%info</td>
    <td>Outputs session information for the current Livy endpoint.</td>
  </tr>
  <tr>
    <td>cleanup</td>
    <td>%%cleanup -f</td>
    <td>Deletes all sessions for the current Livy endpoint, including this notebook's session. The force flag is mandatory.</td>
  </tr>
  <tr>
    <td>delete</td>
    <td>%%delete -f -s 0</td>
    <td>Deletes a session by number for the current Livy endpoint. Cannot delete this kernel's session.</td>
  </tr>
  <tr>
    <td>logs</td>
    <td>%%logs</td>
    <td>Outputs the current session's Livy logs.</td>
  </tr>
  <tr>
    <td>configure</td>
    <td>%%configure -f<br/>{"executorMemory": "1000M", "executorCores": 4}</td>
    <td>Configure the session creation parameters. The force flag is mandatory if a session has already been
    created and the session will be dropped and recreated.<br/>Look at <a href="https://github.com/cloudera/livy#request-body">
    Livy's POST /sessions Request Body</a> for a list of valid parameters. Parameters must be passed in as a JSON string.</td>
  </tr>
  <tr>
    <td>sql</td>
    <td>%%sql -o tables -q<br/>SHOW TABLES</td>
    <td>Executes a SQL query against the sqlContext.
    Parameters:
      <ul>
        <li>-o VAR_NAME: The result of the query will be available in the %%local Python context as a
          <a href="http://pandas.pydata.org/">Pandas</a> dataframe.</li>
        <li>-q: The magic will return None instead of the dataframe (no visualization).</li>
        <li>-m METHOD: Sample method, either <tt>take</tt> or <tt>sample</tt>.</li>
        <li>-n MAXROWS: The maximum number of rows of a SQL query that will be pulled from Livy to Jupyter.
            If this number is negative, then the number of rows will be unlimited.</li>
        <li>-r FRACTION: Fraction used for sampling.</li>
      </ul>
    </td>
  </tr>
  <tr>
    <td>local</td>
    <td>%%local<br/>a = 1</td>
    <td>All the code in subsequent lines will be executed locally. Code must be valid Python code.</td>
  </tr>
</table>
"""
        self.ipython_display.html(help_html)
Example #19
0
    def spark(self, line, cell="", local_ns=None):
        """Magic to execute spark remotely.

           This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can
           be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session.
           When the SQL context is used, the result will be a Pandas dataframe of a sample of the results.

           If invoked with no subcommand, the cell will be executed against the specified session.

           Subcommands
           -----------
           info
               Display the available Livy sessions and other configurations for sessions.
           add
               Add a Livy session given a session name (-s), language (-l), and endpoint credentials.
               The -k argument, if present, will skip adding this session if it already exists.
               e.g. `%spark add -s test -l python -u https://sparkcluster.net/livy -a u -p -k`
           config
               Override the livy session properties sent to Livy on session creation. All session creations will
               contain these config settings from then on.
               Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions
               endpoint in Livy.
               e.g. `%%spark config`
                    `{"driverMemory":"1000M", "executorCores":4}`
           run
               Run Spark code against a session.
               e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created
               e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created
               e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession
                        previously created and store the pandas dataframe created in the my_var variable in the
                        Python environment.
           logs
               Returns the logs for a given session.
               e.g. `%spark logs -s testsession` will return the logs for the testsession previously created
           delete
               Delete a Livy session.
               e.g. `%spark delete -s defaultlivy`
           cleanup
               Delete all Livy sessions created by the notebook. No arguments required.
               e.g. `%spark cleanup`
        """
        usage = "Please look at usage of %spark by executing `%spark?`."
        user_input = line
        args = parse_argstring_or_throw(self.spark, user_input)

        subcommand = args.command[0].lower()

        # info
        if subcommand == "info":
            if args.url is not None:
                endpoint = Endpoint(args.url, args.user, args.password)
                info_sessions = self.spark_controller.get_all_sessions_endpoint_info(endpoint)
                self.print_endpoint_info(info_sessions)
            else:
                self._print_local_info()
        # config
        elif subcommand == "config":
            conf.override(conf.session_configs.__name__, json.loads(cell))
        # add
        elif subcommand == "add":
            if args.url is None:
                self.ipython_display.send_error("Need to supply URL argument (e.g. -u https://example.com/livyendpoint)")
                return

            name = args.session
            language = args.language
            endpoint = Endpoint(args.url, args.user, args.password)
            skip = args.skip

            properties = conf.get_session_properties(language)

            self.spark_controller.add_session(name, endpoint, skip, properties)
        # delete
        elif subcommand == "delete":
            if args.session is not None:
                self.spark_controller.delete_session_by_name(args.session)
            elif args.url is not None:
                if args.id is None:
                    self.ipython_display.send_error("Must provide --id or -i option to delete session at endpoint from URL")
                    return
                endpoint = Endpoint(args.url, args.user, args.password)
                session_id = args.id
                self.spark_controller.delete_session_by_id(endpoint, session_id)
            else:
                self.ipython_display.send_error("Subcommand 'delete' requires a session name or a URL and session ID")
        # cleanup
        elif subcommand == "cleanup":
            if args.url is not None:
                endpoint = Endpoint(args.url, args.user, args.password)
                self.spark_controller.cleanup_endpoint(endpoint)
            else:
                self.spark_controller.cleanup()
        # logs
        elif subcommand == "logs":
            if args.session is None:
                self.ipython_display.send_error("Need to provide session argument (-s SESSION_NAME)")
                return
            self.ipython_display.write(self.spark_controller.get_logs(args.session))
        # run
        elif len(subcommand) == 0:
            if args.session is None:
                self.ipython_display.send_error("Need to provide session argument (-s SESSION_NAME)")
                return
            if args.context == CONTEXT_NAME_SPARK:
                (success, out) = self.spark_controller.run_command(Command(cell), args.session)
                if success:
                    self.ipython_display.write(out)
                else:
                    self.ipython_display.send_error(out)
            elif args.context == CONTEXT_NAME_SQL:
                return self.execute_sqlquery(cell, args.samplemethod, args.maxrows, args.samplefraction,
                                             args.session, args.output, args.quiet)
            else:
                self.ipython_display.send_error("Context '{}' not found".format(args.context))
        # error
        else:
            self.ipython_display.send_error("Subcommand '{}' not found. {}".format(subcommand, usage))