def _setup(): global spark_events, guid, time_stamp spark_events = SparkEvents() spark_events.handler = MagicMock() SparkEvents.get_utc_date_time = MagicMock() time_stamp = spark_events.get_utc_date_time() guid = utils.generate_uuid()
def _setup(): global spark_events, guid1, guid2, guid3, time_stamp spark_events = SparkEvents() spark_events.handler = MagicMock() spark_events._get_utc_date_time = MagicMock() spark_events._verify_language_ok = MagicMock() time_stamp = spark_events._get_utc_date_time() guid1 = utils.generate_uuid() guid2 = utils.generate_uuid() guid3 = utils.generate_uuid()
def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = Log("SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug("Initialized spark magics.") if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event()
def __init__(self, code, spark_events=None): super(Command, self).__init__() self.code = textwrap.dedent(code) self.logger = Log("Command") if spark_events is None: spark_events = SparkEvents() self._spark_events = spark_events
def __init__(self, query, samplemethod=None, maxrows=None, samplefraction=None, spark_events=None): super(SQLQuery, self).__init__() if samplemethod is None: samplemethod = conf.default_samplemethod() if maxrows is None: maxrows = conf.default_maxrows() if samplefraction is None: samplefraction = conf.default_samplefraction() if samplemethod not in {'take', 'sample'}: raise BadUserDataException( 'samplemethod (-m) must be one of (take, sample)') if not isinstance(maxrows, int): raise BadUserDataException('maxrows (-n) must be an integer') if not 0.0 <= samplefraction <= 1.0: raise BadUserDataException( 'samplefraction (-r) must be a float between 0.0 and 1.0') self.query = query self.samplemethod = samplemethod self.maxrows = maxrows self.samplefraction = samplefraction if spark_events is None: spark_events = SparkEvents() self._spark_events = spark_events
def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(KernelMagics, self).__init__(shell, data) self.session_name = "session_name" self.session_started = False # In order to set these following 3 properties, call %%_do_not_call_change_language -l language self.language = "" self.endpoint = None self.fatal_error = False self.fatal_error_message = "" if spark_events is None: spark_events = SparkEvents() self._spark_events = spark_events
def __init__(self, http_client, properties, ipython_display, session_id=-1, sql_created=None, spark_events=None): super(LivySession, self).__init__() assert "kind" in list(properties.keys()) kind = properties["kind"] self.properties = properties self.ipython_display = ipython_display if spark_events is None: spark_events = SparkEvents() self._spark_events = spark_events status_sleep_seconds = conf.status_sleep_seconds() statement_sleep_seconds = conf.statement_sleep_seconds() wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds() assert status_sleep_seconds > 0 assert statement_sleep_seconds > 0 assert wait_for_idle_timeout_seconds > 0 if session_id == -1 and sql_created is True: raise BadUserDataException("Cannot indicate sql state without session id.") self.logger = Log("LivySession") kind = kind.lower() if kind not in constants.SESSION_KINDS_SUPPORTED: raise BadUserDataException("Session of kind '{}' not supported. Session must be of kinds {}." .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED))) if session_id == -1: self.status = constants.NOT_STARTED_SESSION_STATUS sql_created = False else: self.status = constants.BUSY_SESSION_STATUS self._logs = "" self._http_client = http_client self._status_sleep_seconds = status_sleep_seconds self._statement_sleep_seconds = statement_sleep_seconds self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds self.kind = kind self.id = session_id self.created_sql_context = sql_created
def __init__(self, http_client, properties, ipython_display, session_id=-1, sql_created=None): super(LivySession, self).__init__() assert "kind" in list(properties.keys()) kind = properties["kind"] self.properties = properties self.ipython_display = ipython_display self._spark_events = SparkEvents() status_sleep_seconds = conf.status_sleep_seconds() statement_sleep_seconds = conf.statement_sleep_seconds() wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds() assert status_sleep_seconds > 0 assert statement_sleep_seconds > 0 assert wait_for_idle_timeout_seconds > 0 if session_id == -1 and sql_created is True: raise ValueError("Cannot indicate sql state without session id.") self.logger = Log("LivySession") kind = kind.lower() if kind not in constants.SESSION_KINDS_SUPPORTED: raise ValueError("Session of kind '{}' not supported. Session must be of kinds {}." .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED))) if session_id == -1: self.status = constants.NOT_STARTED_SESSION_STATUS sql_created = False else: self.status = constants.BUSY_SESSION_STATUS self._logs = "" self._http_client = http_client self._status_sleep_seconds = status_sleep_seconds self._statement_sleep_seconds = statement_sleep_seconds self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds self.kind = kind self.id = session_id self.created_sql_context = sql_created
class LivySession(ObjectWithGuid): """Session that is livy specific.""" def __init__(self, http_client, properties, ipython_display, session_id=-1, sql_created=None): super(LivySession, self).__init__() assert "kind" in list(properties.keys()) kind = properties["kind"] self.properties = properties self.ipython_display = ipython_display self._spark_events = SparkEvents() status_sleep_seconds = conf.status_sleep_seconds() statement_sleep_seconds = conf.statement_sleep_seconds() wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds() assert status_sleep_seconds > 0 assert statement_sleep_seconds > 0 assert wait_for_idle_timeout_seconds > 0 if session_id == -1 and sql_created is True: raise ValueError("Cannot indicate sql state without session id.") self.logger = Log("LivySession") kind = kind.lower() if kind not in constants.SESSION_KINDS_SUPPORTED: raise ValueError("Session of kind '{}' not supported. Session must be of kinds {}." .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED))) if session_id == -1: self.status = constants.NOT_STARTED_SESSION_STATUS sql_created = False else: self.status = constants.BUSY_SESSION_STATUS self._logs = "" self._http_client = http_client self._status_sleep_seconds = status_sleep_seconds self._statement_sleep_seconds = statement_sleep_seconds self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds self.kind = kind self.id = session_id self.created_sql_context = sql_created def __str__(self): return "Session id: {}\tKind: {}\tState: {}".format(self.id, self.kind, self.status) def start(self, create_sql_context=True): """Start the session against actual livy server.""" self._spark_events.emit_session_creation_start_event(self.guid, self.kind) r = self._http_client.post_session(self.properties) self.id = r["id"] self.status = str(r["state"]) self.ipython_display.writeln("Creating SparkContext as 'sc'") # We wait for livy_session_startup_timeout_seconds() for the session to start up. try: self.wait_for_idle(conf.livy_session_startup_timeout_seconds()) except LivyClientTimeoutError: raise LivyClientTimeoutError("Session {} did not start up in {} seconds."\ .format(self.id, conf.livy_session_startup_timeout_seconds())) if create_sql_context: self.create_sql_context() self._spark_events.emit_session_creation_end_event(self.guid, self.kind, self.id, self.status) def create_sql_context(self): """Create a sqlContext object on the session. Object will be accessible via variable 'sqlContext'.""" if self.created_sql_context: return self.logger.debug("Starting '{}' hive session.".format(self.kind)) self.ipython_display.writeln("Creating HiveContext as 'sqlContext'") command = self._get_sql_context_creation_command() try: command.execute(self) except LivyClientTimeoutError: raise LivyClientTimeoutError("Failed to create the SqlContext in time. Timed out after {} seconds." .format(self._wait_for_idle_timeout_seconds)) self.created_sql_context = True def get_logs(self): log_array = self._http_client.get_all_session_logs(self.id)['log'] self._logs = "\n".join(log_array) return self._logs @property def http_client(self): return self._http_client @staticmethod def is_final_status(status): return status in constants.FINAL_STATUS def delete(self): self.logger.debug("Deleting session '{}'".format(self.id)) if self.status != constants.NOT_STARTED_SESSION_STATUS and self.status != constants.DEAD_SESSION_STATUS: self._http_client.delete_session(self.id) self.status = constants.DEAD_SESSION_STATUS self.id = -1 else: raise ValueError("Cannot delete session {} that is in state '{}'." .format(self.id, self.status)) def wait_for_idle(self, seconds_to_wait=None): """Wait for session to go to idle status. Sleep meanwhile. Calls done every status_sleep_seconds as indicated by the constructor. Parameters: seconds_to_wait : number of seconds to wait before giving up. """ if seconds_to_wait is None: seconds_to_wait = self._wait_for_idle_timeout_seconds while True: self._refresh_status() if self.status == constants.IDLE_SESSION_STATUS: return if self.status in constants.FINAL_STATUS: error = "Session {} unexpectedly reached final status '{}'. See logs:\n{}"\ .format(self.id, self.status, self.get_logs()) self.logger.error(error) raise LivyUnexpectedStatusError(error) if seconds_to_wait <= 0.0: error = "Session {} did not reach idle status in time. Current status is {}."\ .format(self.id, self.status) self.logger.error(error) raise LivyClientTimeoutError(error) start_time = time() self.logger.debug("Session {} in state {}. Sleeping {} seconds." .format(self.id, self.status, self._status_sleep_seconds)) sleep(self._status_sleep_seconds) seconds_to_wait -= time() - start_time def sleep(self): sleep(self._statement_sleep_seconds) def _refresh_status(self): status = self._http_client.get_session(self.id)['state'] if status in constants.POSSIBLE_SESSION_STATUS: self.status = status else: raise ValueError("Status '{}' not supported by session.".format(status)) return self.status def _get_sql_context_creation_command(self): if self.kind == constants.SESSION_KIND_SPARK: sql_context_command = "val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)" elif self.kind == constants.SESSION_KIND_PYSPARK: sql_context_command = "from pyspark.sql import HiveContext\nsqlContext = HiveContext(sc)" elif self.kind == constants.SESSION_KIND_SPARKR: sql_context_command = "sqlContext <- sparkRHive.init(sc)" else: raise ValueError("Do not know how to create HiveContext in session of kind {}.".format(self.kind)) return Command(sql_context_command)
def __init__(self, df, encoding, renderer=None, ipywidget_factory=None, encoding_widget=None, ipython_display=None, nested_widget_mode=False, spark_events=None, testing=False, **kwargs): assert encoding is not None assert df is not None assert type(df) is pd.DataFrame kwargs['orientation'] = 'vertical' if not testing: super(AutoVizWidget, self).__init__((), **kwargs) self.df = self._convert_to_displayable_dataframe(df) if renderer is None: renderer = GraphRenderer() self.renderer = renderer if ipywidget_factory is None: ipywidget_factory = IpyWidgetFactory() self.ipywidget_factory = ipywidget_factory if encoding_widget is None: encoding_widget = EncodingWidget(self.df, encoding, self.on_render_viz) self.encoding_widget = encoding_widget if ipython_display is None: ipython_display = IpythonDisplay() self.ipython_display = ipython_display self.encoding = encoding # Widget that will become the only child of AutoVizWidget self.widget = self.ipywidget_factory.get_vbox() # Create output area self.to_display = self.ipywidget_factory.get_output() self.to_display.width = "800px" self.output = self.ipywidget_factory.get_hbox() self.output.children = [self.to_display] self.controls = self._create_controls_widget() if spark_events is None: spark_events = SparkEvents() self._spark_events = spark_events if nested_widget_mode: self.widget.children = [self.controls, self.output] self.children = [self.widget] else: self.ipython_display.display(self.controls) self.ipython_display.display(self.to_display) self.on_render_viz()