def _setup():
    global spark_events, guid, time_stamp

    spark_events = SparkEvents()
    spark_events.handler = MagicMock()
    SparkEvents.get_utc_date_time = MagicMock()
    time_stamp = spark_events.get_utc_date_time()
    guid = utils.generate_uuid()
def _setup():
    global spark_events, guid1, guid2, guid3, time_stamp

    spark_events = SparkEvents()
    spark_events.handler = MagicMock()
    spark_events._get_utc_date_time = MagicMock()
    spark_events._verify_language_ok = MagicMock()
    time_stamp = spark_events._get_utc_date_time()
    guid1 = utils.generate_uuid()
    guid2 = utils.generate_uuid()
    guid3 = utils.generate_uuid()
Exemple #3
0
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = Log("SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()
Exemple #4
0
 def __init__(self, code, spark_events=None):
     super(Command, self).__init__()
     self.code = textwrap.dedent(code)
     self.logger = Log("Command")
     if spark_events is None:
         spark_events = SparkEvents()
     self._spark_events = spark_events
Exemple #5
0
    def __init__(self,
                 query,
                 samplemethod=None,
                 maxrows=None,
                 samplefraction=None,
                 spark_events=None):
        super(SQLQuery, self).__init__()
        if samplemethod is None:
            samplemethod = conf.default_samplemethod()
        if maxrows is None:
            maxrows = conf.default_maxrows()
        if samplefraction is None:
            samplefraction = conf.default_samplefraction()

        if samplemethod not in {'take', 'sample'}:
            raise BadUserDataException(
                'samplemethod (-m) must be one of (take, sample)')
        if not isinstance(maxrows, int):
            raise BadUserDataException('maxrows (-n) must be an integer')
        if not 0.0 <= samplefraction <= 1.0:
            raise BadUserDataException(
                'samplefraction (-r) must be a float between 0.0 and 1.0')

        self.query = query
        self.samplemethod = samplemethod
        self.maxrows = maxrows
        self.samplefraction = samplefraction
        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events
Exemple #6
0
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(KernelMagics, self).__init__(shell, data)

        self.session_name = "session_name"
        self.session_started = False

        # In order to set these following 3 properties, call %%_do_not_call_change_language -l language
        self.language = ""
        self.endpoint = None
        self.fatal_error = False
        self.fatal_error_message = ""
        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events
Exemple #7
0
    def __init__(self, http_client, properties, ipython_display,
                 session_id=-1, sql_created=None, spark_events=None):
        super(LivySession, self).__init__()
        assert "kind" in list(properties.keys())
        kind = properties["kind"]
        self.properties = properties
        self.ipython_display = ipython_display

        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert wait_for_idle_timeout_seconds > 0
        if session_id == -1 and sql_created is True:
            raise BadUserDataException("Cannot indicate sql state without session id.")

        self.logger = Log("LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise BadUserDataException("Session of kind '{}' not supported. Session must be of kinds {}."
                                       .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
            sql_created = False
        else:
            self.status = constants.BUSY_SESSION_STATUS

        self._logs = ""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds

        self.kind = kind
        self.id = session_id
        self.created_sql_context = sql_created
Exemple #8
0
    def __init__(self, http_client, properties, ipython_display,
                 session_id=-1, sql_created=None):
        super(LivySession, self).__init__()
        assert "kind" in list(properties.keys())
        kind = properties["kind"]
        self.properties = properties
        self.ipython_display = ipython_display
        self._spark_events = SparkEvents()

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert wait_for_idle_timeout_seconds > 0
        if session_id == -1 and sql_created is True:
            raise ValueError("Cannot indicate sql state without session id.")

        self.logger = Log("LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise ValueError("Session of kind '{}' not supported. Session must be of kinds {}."
                             .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
            sql_created = False
        else:
            self.status = constants.BUSY_SESSION_STATUS

        self._logs = ""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds

        self.kind = kind
        self.id = session_id
        self.created_sql_context = sql_created
Exemple #9
0
class LivySession(ObjectWithGuid):
    """Session that is livy specific."""

    def __init__(self, http_client, properties, ipython_display,
                 session_id=-1, sql_created=None):
        super(LivySession, self).__init__()
        assert "kind" in list(properties.keys())
        kind = properties["kind"]
        self.properties = properties
        self.ipython_display = ipython_display
        self._spark_events = SparkEvents()

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert wait_for_idle_timeout_seconds > 0
        if session_id == -1 and sql_created is True:
            raise ValueError("Cannot indicate sql state without session id.")

        self.logger = Log("LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise ValueError("Session of kind '{}' not supported. Session must be of kinds {}."
                             .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
            sql_created = False
        else:
            self.status = constants.BUSY_SESSION_STATUS

        self._logs = ""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds

        self.kind = kind
        self.id = session_id
        self.created_sql_context = sql_created

    def __str__(self):
        return "Session id: {}\tKind: {}\tState: {}".format(self.id, self.kind, self.status)

    def start(self, create_sql_context=True):
        """Start the session against actual livy server."""
        self._spark_events.emit_session_creation_start_event(self.guid, self.kind)

        r = self._http_client.post_session(self.properties)
        self.id = r["id"]
        self.status = str(r["state"])

        self.ipython_display.writeln("Creating SparkContext as 'sc'")
        # We wait for livy_session_startup_timeout_seconds() for the session to start up.
        try:
            self.wait_for_idle(conf.livy_session_startup_timeout_seconds())
        except LivyClientTimeoutError:
            raise LivyClientTimeoutError("Session {} did not start up in {} seconds."\
                                         .format(self.id, conf.livy_session_startup_timeout_seconds()))

        if create_sql_context:
            self.create_sql_context()
        self._spark_events.emit_session_creation_end_event(self.guid, self.kind, self.id, self.status)

    def create_sql_context(self):
        """Create a sqlContext object on the session. Object will be accessible via variable 'sqlContext'."""
        if self.created_sql_context:
            return
        self.logger.debug("Starting '{}' hive session.".format(self.kind))
        self.ipython_display.writeln("Creating HiveContext as 'sqlContext'")
        command = self._get_sql_context_creation_command()
        try:
            command.execute(self)
        except LivyClientTimeoutError:
            raise LivyClientTimeoutError("Failed to create the SqlContext in time. Timed out after {} seconds."
                                         .format(self._wait_for_idle_timeout_seconds))
        self.created_sql_context = True

    def get_logs(self):
        log_array = self._http_client.get_all_session_logs(self.id)['log']
        self._logs = "\n".join(log_array)
        return self._logs

    @property
    def http_client(self):
        return self._http_client

    @staticmethod
    def is_final_status(status):
        return status in constants.FINAL_STATUS

    def delete(self):
        self.logger.debug("Deleting session '{}'".format(self.id))

        if self.status != constants.NOT_STARTED_SESSION_STATUS and self.status != constants.DEAD_SESSION_STATUS:
            self._http_client.delete_session(self.id)
            self.status = constants.DEAD_SESSION_STATUS
            self.id = -1
        else:
            raise ValueError("Cannot delete session {} that is in state '{}'."
                             .format(self.id, self.status))

    def wait_for_idle(self, seconds_to_wait=None):
        """Wait for session to go to idle status. Sleep meanwhile. Calls done every status_sleep_seconds as
        indicated by the constructor.

        Parameters:
            seconds_to_wait : number of seconds to wait before giving up.
        """
        if seconds_to_wait is None:
            seconds_to_wait = self._wait_for_idle_timeout_seconds

        while True:
            self._refresh_status()
            if self.status == constants.IDLE_SESSION_STATUS:
                return

            if self.status in constants.FINAL_STATUS:
                error = "Session {} unexpectedly reached final status '{}'. See logs:\n{}"\
                    .format(self.id, self.status, self.get_logs())
                self.logger.error(error)
                raise LivyUnexpectedStatusError(error)

            if seconds_to_wait <= 0.0:
                error = "Session {} did not reach idle status in time. Current status is {}."\
                    .format(self.id, self.status)
                self.logger.error(error)
                raise LivyClientTimeoutError(error)

            start_time = time()
            self.logger.debug("Session {} in state {}. Sleeping {} seconds."
                              .format(self.id, self.status, self._status_sleep_seconds))
            sleep(self._status_sleep_seconds)
            seconds_to_wait -= time() - start_time

    def sleep(self):
        sleep(self._statement_sleep_seconds)

    def _refresh_status(self):
        status = self._http_client.get_session(self.id)['state']

        if status in constants.POSSIBLE_SESSION_STATUS:
            self.status = status
        else:
            raise ValueError("Status '{}' not supported by session.".format(status))

        return self.status

    def _get_sql_context_creation_command(self):
        if self.kind == constants.SESSION_KIND_SPARK:
            sql_context_command = "val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)"
        elif self.kind == constants.SESSION_KIND_PYSPARK:
            sql_context_command = "from pyspark.sql import HiveContext\nsqlContext = HiveContext(sc)"
        elif self.kind == constants.SESSION_KIND_SPARKR:
            sql_context_command = "sqlContext <- sparkRHive.init(sc)"
        else:
            raise ValueError("Do not know how to create HiveContext in session of kind {}.".format(self.kind))

        return Command(sql_context_command)
Exemple #10
0
    def __init__(self,
                 df,
                 encoding,
                 renderer=None,
                 ipywidget_factory=None,
                 encoding_widget=None,
                 ipython_display=None,
                 nested_widget_mode=False,
                 spark_events=None,
                 testing=False,
                 **kwargs):
        assert encoding is not None
        assert df is not None
        assert type(df) is pd.DataFrame

        kwargs['orientation'] = 'vertical'

        if not testing:
            super(AutoVizWidget, self).__init__((), **kwargs)

        self.df = self._convert_to_displayable_dataframe(df)

        if renderer is None:
            renderer = GraphRenderer()
        self.renderer = renderer

        if ipywidget_factory is None:
            ipywidget_factory = IpyWidgetFactory()
        self.ipywidget_factory = ipywidget_factory

        if encoding_widget is None:
            encoding_widget = EncodingWidget(self.df, encoding,
                                             self.on_render_viz)
        self.encoding_widget = encoding_widget

        if ipython_display is None:
            ipython_display = IpythonDisplay()
        self.ipython_display = ipython_display

        self.encoding = encoding

        # Widget that will become the only child of AutoVizWidget
        self.widget = self.ipywidget_factory.get_vbox()

        # Create output area
        self.to_display = self.ipywidget_factory.get_output()
        self.to_display.width = "800px"
        self.output = self.ipywidget_factory.get_hbox()
        self.output.children = [self.to_display]

        self.controls = self._create_controls_widget()

        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

        if nested_widget_mode:
            self.widget.children = [self.controls, self.output]
            self.children = [self.widget]
        else:
            self.ipython_display.display(self.controls)
            self.ipython_display.display(self.to_display)

        self.on_render_viz()