Exemple #1
0
def test_get_session(mocker, server):

    mocker.patch.object(Session, 'from_json')

    client = LivyClient(server)
    session = client.get_session(MOCK_SESSION_ID)

    assert session == Session.from_json.return_value
    Session.from_json.assert_called_once_with(MOCK_SESSION_JSON)
Exemple #2
0
def test_list_sessions(mocker, server):

    mocker.patch.object(Session, 'from_json')

    client = LivyClient(server)
    sessions = client.list_sessions()

    assert sessions == [Session.from_json.return_value]
    Session.from_json.assert_called_once_with(MOCK_SESSION_JSON)
Exemple #3
0
def test_create_session(mocker, server):

    mocker.patch.object(Session, 'from_json')

    client = LivyClient(server)
    session = client.create_session(SessionKind.PYSPARK,
                                    spark_conf=MOCK_SPARK_CONF)

    assert session == Session.from_json.return_value
    Session.from_json.assert_called_once_with(MOCK_SESSION_JSON)
Exemple #4
0
def test_get_statement(mocker, server):

    mocker.patch.object(Statement, 'from_json')

    client = LivyClient(server)
    statement = client.get_statement(MOCK_SESSION_ID, MOCK_STATEMENT_ID)

    assert statement == Statement.from_json.return_value
    Statement.from_json.assert_called_once_with(MOCK_SESSION_ID,
                                                MOCK_STATEMENT_JSON)
Exemple #5
0
 def __init__(
         self, url: str, kind: SessionKind = SessionKind.PYSPARK,
         spark_conf: Dict[str, Any] = None, echo: bool = True, check: bool = True,
         spark_jars: List[str] = None, spark_executor_cores: int = None, spark_executor_memory: str = None,
         spark_proxy_user: str = None
 ) -> None:
     self.client = LivyClient(url)
     self.kind = kind
     self.echo = echo
     self.check = check
     self.session_id: Optional[int] = None
     self.spark_conf = spark_conf
     self.spark_jars = spark_jars
     self.spark_executor_memory = spark_executor_memory
     self.spark_executor_cores = spark_executor_cores
     self.spark_proxy_user = spark_proxy_user
Exemple #6
0
def test_delete_session(mocker, server):
    client = LivyClient(server)
    client.delete_session(MOCK_SESSION_ID)
Exemple #7
0
class LivySession:

    def __init__(
            self, url: str, kind: SessionKind = SessionKind.PYSPARK,
            spark_conf: Dict[str, Any] = None, echo: bool = True, check: bool = True,
            spark_jars: List[str] = None, spark_executor_cores: int = None, spark_executor_memory: str = None,
            spark_proxy_user: str = None
    ) -> None:
        self.client = LivyClient(url)
        self.kind = kind
        self.echo = echo
        self.check = check
        self.session_id: Optional[int] = None
        self.spark_conf = spark_conf
        self.spark_jars = spark_jars
        self.spark_executor_memory = spark_executor_memory
        self.spark_executor_cores = spark_executor_cores
        self.spark_proxy_user = spark_proxy_user

    def __enter__(self) -> 'LivySession':
        self.start()
        return self

    def __exit__(self, exc_type, exc_value, traceback) -> None:
        self.close()

    def start(self) -> None:
        session = self.client.create_session(self.kind, self.spark_conf, self.spark_jars, self.spark_executor_cores,
                                             self.spark_executor_memory,self.spark_proxy_user)
        self.session_id = session.session_id

        not_ready = {SessionState.NOT_STARTED, SessionState.STARTING}
        intervals = polling_intervals([0.1, 0.2, 0.3, 0.5], 1.0)

        while self.state in not_ready:
            time.sleep(next(intervals))

    @property
    def state(self) -> SessionState:
        if self.session_id is None:
            raise ValueError('session not yet started')
        session = self.client.get_session(self.session_id)
        if session is None:
            raise ValueError('session not found - it may have been shut down')
        return session.state

    def close(self) -> None:
        if self.session_id is not None:
            self.client.delete_session(self.session_id)
        self.client.close()

    def run(self, code: str) -> Output:
        output = self._execute(code)
        if self.echo and output.text:
            print(output.text)
        if self.check:
            output.raise_for_status()
        return output

    def read(self, dataframe_name: str) -> pandas.DataFrame:
        code = serialise_dataframe_code(dataframe_name, self.kind)
        output = self._execute(code)
        output.raise_for_status()
        if output.text is None:
            raise RuntimeError('statement had no text output')
        return deserialise_dataframe(output.text)

    def read_sql(self, code: str) -> pandas.DataFrame:
        if self.kind != SessionKind.SQL:
            raise ValueError('not a SQL session')
        output = self._execute(code)
        output.raise_for_status()
        if output.json is None:
            raise RuntimeError('statement had no JSON output')
        return dataframe_from_json_output(output.json)

    def _execute(self, code: str) -> Output:
        if self.session_id is None:
            raise ValueError('session not yet started')

        statement = self.client.create_statement(self.session_id, code)

        not_finished = {StatementState.WAITING, StatementState.RUNNING}
        intervals = polling_intervals([0.1, 0.2, 0.3, 0.5], 1.0)

        while statement.state in not_finished:
            time.sleep(next(intervals))
            statement = self.client.get_statement(
                statement.session_id, statement.statement_id
            )

        if statement.output is None and statement.state == StatementState.AVAILABLE:
            """
            Incase of statement is processed but the output is yet to be written
            """
            for i in range(100):
                time.sleep(0.5)
                statement = self.client.get_statement(
                    statement.session_id, statement.statement_id
                )
                if statement.output is not None:
                    break

        if statement.output is None:
            raise RuntimeError('statement had no output')

        return statement.output