def __init__(self, output_var, samplemethod=None, maxrows=None, samplefraction=None, spark_events=None, coerce=None): super(SparkStoreCommand, self).__init__("", spark_events) if samplemethod is None: samplemethod = conf.default_samplemethod() if maxrows is None: maxrows = conf.default_maxrows() if samplefraction is None: samplefraction = conf.default_samplefraction() if samplemethod not in {u'take', u'sample'}: raise BadUserDataException( u'samplemethod (-m) must be one of (take, sample)') if not isinstance(maxrows, int): raise BadUserDataException(u'maxrows (-n) must be an integer') if not 0.0 <= samplefraction <= 1.0: raise BadUserDataException( u'samplefraction (-r) must be a float between 0.0 and 1.0') self.samplemethod = samplemethod self.maxrows = maxrows self.samplefraction = samplefraction self.output_var = output_var if spark_events is None: spark_events = SparkEvents() self._spark_events = spark_events self._coerce = coerce
def do_send_to_spark(self, cell, input_variable_name, var_type, output_variable_name, max_rows, session_name): try: input_variable_value = self.shell.user_ns[input_variable_name] except KeyError: raise BadUserDataException(u'Variable named {} not found.'.format(input_variable_name)) if input_variable_value is None: raise BadUserDataException(u'Value of {} is None!'.format(input_variable_name)) if not output_variable_name: output_variable_name = input_variable_name if not max_rows: max_rows = conf.default_maxrows() input_variable_type = var_type.lower() if input_variable_type == self._STRING_VAR_TYPE: command = SendStringToSparkCommand(input_variable_name, input_variable_value, output_variable_name) elif input_variable_type == self._PANDAS_DATAFRAME_VAR_TYPE: command = SendPandasDfToSparkCommand(input_variable_name, input_variable_value, output_variable_name, max_rows) else: raise BadUserDataException(u'Invalid or incorrect -t type. Available are: [{}]'.format(u','.join(self._ALLOWED_LOCAL_TO_SPARK_TYPES))) (success, result, mime_type) = self.spark_controller.run_command(command, None) if not success: self.ipython_display.send_error(result) else: self.ipython_display.write(u'Successfully passed \'{}\' as \'{}\' to Spark' u' kernel'.format(input_variable_name, output_variable_name))
def _assert_input_is_string_type(self, input_variable_name, input_variable_value): if not isinstance(input_variable_value, str): wrong_type = input_variable_value.__class__.__name__ raise BadUserDataException( u'{} is not a str or bytes! Got {} instead'.format( input_variable_name, wrong_type))
def _assert_input_is_pandas_dataframe(self, input_variable_name, input_variable_value): if not isinstance(input_variable_value, pd.DataFrame): wrong_type = input_variable_value.__class__.__name__ raise BadUserDataException( u'{} is not a Pandas DataFrame! Got {} instead.'.format( input_variable_name, wrong_type))
def test_help_with_cell_content(): msg = "Cell body for %%help magic must be empty; got 'HAHAH' instead" magic.help("", cell="HAHAH") assert_equals(ipython_display.send_error.call_count, 1) assert_equals(ipython_display.html.call_count, 0) _assert_magic_failure_event_emitted_once('help', BadUserDataException(msg))
def test_configure_cant_parse_object_as_json(): magic.info = MagicMock() magic._override_session_settings = MagicMock(side_effect=BadUserDataException('help')) magic.configure('', "I CAN'T PARSE THIS AS JSON") _assert_magic_successful_event_emitted_once('configure') assert_equals(ipython_display.send_error.call_count, 1)
def test_configure_expected_exception(): magic.info = MagicMock() magic._override_session_settings = MagicMock(side_effect=BadUserDataException('help')) magic.configure('', '{"extra": "yes"}') _assert_magic_failure_event_emitted_once('configure', magic._override_session_settings.side_effect) ipython_display.send_error.assert_called_once_with(constants.EXPECTED_ERROR_MSG\ .format(magic._override_session_settings.side_effect))
def test_logs_with_cell_content(): logs = "logs" line = "" msg = "Cell body for %%logs magic must be empty; got 'BOOP' instead" magic.logs(line, cell="BOOP") assert_equals(ipython_display.send_error.call_count, 1) _assert_magic_failure_event_emitted_once('logs', BadUserDataException(msg))
def parse_argstring_or_throw(magic_func, argstring, parse_argstring=parse_argstring): """An alternative to the parse_argstring method from IPython.core.magic_arguments. Catches IPython.core.error.UsageError and propagates it as a livyclientlib.exceptions.BadUserDataException.""" try: return parse_argstring(magic_func, argstring) except UsageError as e: raise BadUserDataException(str(e))
def test_spark_statement_exception(): mockSparkCommand = MagicMock() magic._spark_store_command = MagicMock(return_value=mockSparkCommand) exception = BadUserDataException("Ka-boom!") magic.spark_controller.run_command.side_effect = [(False, 'out', "text/plain"), exception] assert_raises(SparkStatementException, magic.execute_spark,"", None, None, None, None, session, True) magic.spark_controller.cleanup.assert_not_called()
def _do_not_call_change_endpoint(self, line, cell="", local_ns=None): args = parse_argstring_or_throw(self._do_not_call_change_endpoint, line) if self.session_started: error = u"Cannot change the endpoint if a session has been started." raise BadUserDataException(error) auth = initialize_auth(args=args) self.endpoint = Endpoint(args.url, auth)
def to_command(self, kind, spark_context_variable_name): if kind == constants.SESSION_KIND_PYSPARK: return self._pyspark_command(spark_context_variable_name) elif kind == constants.SESSION_KIND_SPARK: return self._scala_command(spark_context_variable_name) elif kind == constants.SESSION_KIND_SPARKR: return self._r_command(spark_context_variable_name) else: raise BadUserDataException( u"Kind '{}' is not supported.".format(kind))
def test_cleanup_with_cell_content(): line = "-f" cell = "HEHEHE" msg = "Cell body for %%cleanup magic must be empty; got 'HEHEHE' instead" magic.session_started = True spark_controller.cleanup_endpoint = MagicMock() spark_controller.delete_session_by_name = MagicMock() magic.cleanup(line, cell) assert_equals(ipython_display.send_error.call_count, 1) _assert_magic_failure_event_emitted_once('cleanup', BadUserDataException(msg))
def test_delete_session_expected_exception(): line = "" magic.session_started = True spark_controller.delete_session_by_name.side_effect = BadUserDataException('hey') magic._do_not_call_delete_session(line) assert not magic.session_started spark_controller.delete_session_by_name.assert_called_once_with(magic.session_name) ipython_display.send_error.assert_called_once_with(constants.EXPECTED_ERROR_MSG .format(spark_controller.delete_session_by_name.side_effect))
def execute(self, session): try: command = self.to_command(session.kind, self.output_var) (success, records_text) = command.execute(session) if not success: raise BadUserDataException(records_text) result = records_to_dataframe(records_text, session.kind, self._coerce) except Exception as e: raise else: return result
def send_to_spark(self, line, cell=u"", local_ns=None): self._assure_cell_body_is_empty(KernelMagics.send_to_spark.__name__, cell) args = parse_argstring_or_throw(self.send_to_spark, line) if not args.input: raise BadUserDataException("-i param not provided.") if self._do_not_call_start_session(""): self.do_send_to_spark(cell, args.input, args.vartype, args.varname, args.maxrows, None) else: return
def _do_not_call_change_endpoint(self, line, cell="", local_ns=None): args = parse_argstring_or_throw(self._do_not_call_change_endpoint, line) username = args.username password = args.password server = args.server if self.session_started: error = u"Cannot change the endpoint if a session has been started." raise BadUserDataException(error) self.endpoint = Endpoint(server, username, password)
def test_spark_statement_exception_shutdowns_livy_session(): conf.override_all({ "shutdown_session_on_spark_statement_errors": True }) mockSparkCommand = MagicMock() magic._spark_store_command = MagicMock(return_value=mockSparkCommand) exception = BadUserDataException("Ka-boom!") magic.spark_controller.run_command.side_effect = [(False, 'out', "text/plain"), exception] assert_raises(SparkStatementException, magic.execute_spark,"", None, None, None, None, session, True) magic.spark_controller.cleanup.assert_called_once()
def test_spark_exception_with_output_var(): mockSparkCommand = MagicMock() magic._spark_store_command = MagicMock(return_value=mockSparkCommand) exception = BadUserDataException("Ka-boom!") output_var = "var_name" df = 'df' magic.spark_controller.run_command.side_effect = [(True,'out'), exception] assert_raises(BadUserDataException,magic.execute_spark,"", output_var, None, None, None, session) magic.ipython_display.write.assert_called_once_with('out') magic._spark_store_command.assert_called_once_with(output_var, None, None, None) assert shell.user_ns == {}
def test_delete_with_cell_content(): # This happens when session has not been created session_id = 0 line = "-f -s {}".format(session_id) cell = "~~~" msg = "Cell body for %%delete magic must be empty; got '~~~' instead" spark_controller.delete_session_by_id = MagicMock() spark_controller.get_session_id_for_client = MagicMock(return_value=None) magic.delete(line, cell) _assert_magic_failure_event_emitted_once('delete', BadUserDataException(msg)) assert_equals(ipython_display.send_error.call_count, 1)
def test_info_with_cell_content(): magic._print_endpoint_info = print_info_mock = MagicMock() line = "" session_info = ["1", "2"] spark_controller.get_all_sessions_endpoint_info = MagicMock(return_value=session_info) error_msg = "Cell body for %%info magic must be empty; got 'howdy' instead" magic.info(line, cell='howdy') print_info_mock.assert_not_called() assert_equals(ipython_display.send_error.call_count, 1) spark_controller.get_session_id_for_client.assert_not_called() _assert_magic_failure_event_emitted_once('info', BadUserDataException(error_msg))
def _do_not_call_change_endpoint(self, line, cell="", local_ns=None): args = parse_argstring_or_throw(self._do_not_call_change_endpoint, line) username = args.username password = args.password server = args.server auth = args.auth mutualauth = args.krb_mutual_auth kerberoshostname = args.krb_host_override if self.session_started: error = u"Cannot change the endpoint if a session has been started." raise BadUserDataException(error) self.endpoint = Endpoint(server, auth, username, password, krb_mutual_auth=mutualauth, krb_host_override=kerberoshostname)
def __init__(self, parsed_attributes=None): """Initializes the Authenticator with the attributes in the attributes parsed from a %spark magic command if applicable, or with default values otherwise. Args: self, parsed_attributes (IPython.core.magics.namespace): The namespace object that is created from parsing %spark magic command. """ if parsed_attributes is not None: if parsed_attributes.user == '' or parsed_attributes.password == '': new_exc = BadUserDataException("Need to supply username and password arguments for "\ "Basic Access Authentication. (e.g. -a username -p password).") raise new_exc self.username = parsed_attributes.user self.password = parsed_attributes.password else: self.username = '******' self.password = '******' HTTPBasicAuth.__init__(self, self.username, self.password) Authenticator.__init__(self, parsed_attributes)
def _assure_cell_body_is_empty(magic_name, cell): if cell.strip(): raise BadUserDataException( u"Cell body for %%{} magic must be empty; got '{}' instead". format(magic_name, cell.strip()))