def test_configure(): # Mock info method magic.info = MagicMock() # Session not started conf.override_all({}) magic.configure('{"extra": "yes"}') assert conf.session_configs() == {"extra": "yes"} magic.info.assert_called_once_with("") # Session started - no -f magic.session_started = True conf.override_all({}) magic.configure("{\"extra\": \"yes\"}") assert conf.session_configs() == {} assert_equals(ipython_display.send_error.call_count, 1) # Session started - with -f magic.info.reset_mock() conf.override_all({}) magic.configure("-f {\"extra\": \"yes\"}") assert conf.session_configs() == {"extra": "yes"} spark_controller.delete_session_by_name.assert_called_once_with( magic.session_name) spark_controller.add_session.assert_called_once_with( magic.session_name, magic.connection_string, False, { "kind": constants.SESSION_KIND_PYSPARK, "extra": "yes" }) magic.info.assert_called_once_with("")
def test_configure(): # Mock info method magic.info = MagicMock() # Session not started conf.override_all({}) magic.configure('{"extra": "yes"}') assert conf.session_configs() == {"extra": "yes"} magic.info.assert_called_once_with("") # Session started - no -f magic.session_started = True conf.override_all({}) magic.configure("{\"extra\": \"yes\"}") assert conf.session_configs() == {} assert_equals(ipython_display.send_error.call_count, 1) # Session started - with -f magic.info.reset_mock() conf.override_all({}) magic.configure("-f {\"extra\": \"yes\"}") assert conf.session_configs() == {"extra": "yes"} spark_controller.delete_session_by_name.assert_called_once_with(magic.session_name) spark_controller.add_session.assert_called_once_with(magic.session_name, magic.endpoint, False, {"kind": constants.SESSION_KIND_PYSPARK, "extra": "yes"}) magic.info.assert_called_once_with("")
def __init__(self, spark_controller, ipywidget_factory, ipython_display, endpoints_dropdown_widget, refresh_method): # This is nested super(CreateSessionWidget, self).__init__(spark_controller, ipywidget_factory, ipython_display, True) self.refresh_method = refresh_method self.endpoints_dropdown_widget = endpoints_dropdown_widget self.session_widget = self.ipywidget_factory.get_text( description='Name:', value='session-name' ) self.lang_widget = self.ipywidget_factory.get_toggle_buttons( description='Language:', options=[LANG_SCALA, LANG_PYTHON], ) self.properties = self.ipywidget_factory.get_text( description='Properties:', value=json.dumps(conf.session_configs()) ) self.submit_widget = self.ipywidget_factory.get_submit_button( description='Create Session' ) self.children = [self.ipywidget_factory.get_html(value="<br/>", width="600px"), self.endpoints_dropdown_widget, self.session_widget, self.lang_widget, self.properties, self.ipywidget_factory.get_html(value="<br/>", width="600px"), self.submit_widget] for child in self.children: child.parent_widget = self
def _print_local_info(self): sessions_info = [" {}".format(i) for i in self.spark_controller.get_manager_sessions_str()] print("""Info for running Spark: Sessions: {} Session configs: {} """.format("\n".join(sessions_info), conf.session_configs()))
def test_add_sessions_command_extra_properties(): conf.override_all({}) magic.spark("config {\"extra\": \"yes\"}") assert conf.session_configs() == {"extra": "yes"} add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "name" language = "scala" connection_string = "url=http://location:port;username=name;password=word" line = " ".join([command, name, language, connection_string]) magic.spark(line) add_sessions_mock.assert_called_once_with(name, connection_string, False, {"kind": "spark", "extra": "yes"}) conf.load()
def test_add_sessions_command_extra_properties(): conf.override_all({}) magic.spark("config {\"extra\": \"yes\"}") assert conf.session_configs() == {"extra": "yes"} add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l scala" connection_string = "-u http://livyendpoint.com" line = " ".join([command, name, language, connection_string]) magic.spark(line) add_sessions_mock.assert_called_once_with("name", Endpoint("http://livyendpoint.com"), False, {"kind": "spark", "extra": "yes"}) conf.load()
def test_add_sessions_command_extra_properties(): conf.override_all({}) magic.spark("config {\"extra\": \"yes\"}") assert conf.session_configs() == {"extra": "yes"} add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "name" language = "scala" connection_string = "url=http://location:port;username=name;password=word" line = " ".join([command, name, language, connection_string]) magic.spark(line) add_sessions_mock.assert_called_once_with(name, connection_string, False, { "kind": "spark", "extra": "yes" }) conf.load()
def test_add_sessions_command_extra_properties(): conf.override_all({}) magic.spark("config", "{\"extra\": \"yes\"}") assert conf.session_configs() == {"extra": "yes"} add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l scala" connection_string = "-u http://livyendpoint.com" line = " ".join([command, name, language, connection_string]) magic.spark(line) add_sessions_mock.assert_called_once_with( "name", Endpoint("http://livyendpoint.com"), False, { "kind": "spark", "extra": "yes" }) conf.load()
def spark(self, line, cell="", local_ns=None): """Magic to execute spark remotely. This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session. When the SQL context is used, the result will be a Pandas dataframe of a sample of the results. If invoked with no subcommand, the cell will be executed against the specified session. Subcommands ----------- info Display the available Livy sessions and other configurations for sessions. add Add a Livy session. First argument is the name of the session, second argument is the language, and third argument is the connection string of the Livy endpoint. A fourth argument specifying if session creation can be skipped if it already exists is optional: "skip" or empty. e.g. `%%spark add test python url=https://sparkcluster.net/livy;username=u;password=p skip` or e.g. `%%spark add test python url=https://sparkcluster.net/livy;username=u;password=p` config Override the livy session properties sent to Livy on session creation. All session creations will contain these config settings from then on. Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions endpoint in Livy. e.g. `%%spark config {"driverMemory":"1000M", "executorCores":4}` run Run Spark code against a session. e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession previously created and store the pandas dataframe created in the my_var variable in the Python environment. logs Returns the logs for a given session. e.g. `%%spark logs -s testsession` will return the logs for the testsession previously created delete Delete a Livy session. Argument is the name of the session to be deleted. e.g. `%%spark delete defaultlivy` cleanup Delete all Livy sessions created by the notebook. No arguments required. e.g. `%%spark cleanup` """ usage = "Please look at usage of %spark by executing `%spark?`." user_input = line args = parse_argstring(self.spark, user_input) subcommand = args.command[0].lower() try: # info if subcommand == "info": if len(args.command) == 2: connection_string = args.command[1] info_sessions = self.spark_controller.get_all_sessions_endpoint_info(connection_string) self._print_endpoint_info(info_sessions) elif len(args.command) == 1: self._print_local_info() else: raise ValueError("Subcommand 'info' requires no value or a connection string to show all sessions.\n" "{}".format(usage)) # config elif subcommand == "config": # Would normally do " ".join(args.command[1:]) but parse_argstring removes quotes... rest_of_line = user_input[7:] conf.override(conf.session_configs.__name__, json.loads(rest_of_line)) # add elif subcommand == "add": if len(args.command) != 4 and len(args.command) != 5: raise ValueError("Subcommand 'add' requires three or four arguments.\n{}".format(usage)) name = args.command[1].lower() language = args.command[2].lower() connection_string = args.command[3] if len(args.command) == 5: skip = args.command[4].lower() == "skip" else: skip = False properties = copy.deepcopy(conf.session_configs()) properties["kind"] = self._get_livy_kind(language) self.spark_controller.add_session(name, connection_string, skip, properties) # delete elif subcommand == "delete": if len(args.command) == 2: name = args.command[1].lower() self.spark_controller.delete_session_by_name(name) elif len(args.command) == 3: connection_string = args.command[1] session_id = args.command[2] self.spark_controller.delete_session_by_id(connection_string, session_id) else: raise ValueError("Subcommand 'delete' requires a session name or a connection string and id.\n{}" .format(usage)) # cleanup elif subcommand == "cleanup": if len(args.command) == 2: connection_string = args.command[1] self.spark_controller.cleanup_endpoint(connection_string) elif len(args.command) == 1: self.spark_controller.cleanup() else: raise ValueError("Subcommand 'cleanup' requires no further values or a connection string to clean up " "sessions.\n{}".format(usage)) # logs elif subcommand == "logs": if len(args.command) == 1: (success, out) = self.spark_controller.get_logs(args.session) if success: self.ipython_display.write(out) else: self.ipython_display.send_error(out) else: raise ValueError("Subcommand 'logs' requires no further values.\n{}".format(usage)) # run elif len(subcommand) == 0: if args.context == Constants.context_name_spark: (success, out) = self.spark_controller.run_cell(cell, args.session) if success: self.ipython_display.write(out) else: self.ipython_display.send_error(out) elif args.context == Constants.context_name_sql: return self._execute_against_context_that_returns_df(self.spark_controller.run_cell_sql, cell, args.session, args.output) elif args.context == Constants.context_name_hive: return self._execute_against_context_that_returns_df(self.spark_controller.run_cell_hive, cell, args.session, args.output) else: raise ValueError("Context '{}' not found".format(args.context)) # error else: raise ValueError("Subcommand '{}' not found. {}".format(subcommand, usage)) except ValueError as err: self.ipython_display.send_error("{}".format(err))