def test_get_session(mocker, server): mocker.patch.object(Session, 'from_json') client = LivyClient(server) session = client.get_session(MOCK_SESSION_ID) assert session == Session.from_json.return_value Session.from_json.assert_called_once_with(MOCK_SESSION_JSON)
def test_list_sessions(mocker, server): mocker.patch.object(Session, 'from_json') client = LivyClient(server) sessions = client.list_sessions() assert sessions == [Session.from_json.return_value] Session.from_json.assert_called_once_with(MOCK_SESSION_JSON)
def test_create_session(mocker, server): mocker.patch.object(Session, 'from_json') client = LivyClient(server) session = client.create_session(SessionKind.PYSPARK, spark_conf=MOCK_SPARK_CONF) assert session == Session.from_json.return_value Session.from_json.assert_called_once_with(MOCK_SESSION_JSON)
def test_get_statement(mocker, server): mocker.patch.object(Statement, 'from_json') client = LivyClient(server) statement = client.get_statement(MOCK_SESSION_ID, MOCK_STATEMENT_ID) assert statement == Statement.from_json.return_value Statement.from_json.assert_called_once_with(MOCK_SESSION_ID, MOCK_STATEMENT_JSON)
def __init__( self, url: str, kind: SessionKind = SessionKind.PYSPARK, spark_conf: Dict[str, Any] = None, echo: bool = True, check: bool = True, spark_jars: List[str] = None, spark_executor_cores: int = None, spark_executor_memory: str = None, spark_proxy_user: str = None ) -> None: self.client = LivyClient(url) self.kind = kind self.echo = echo self.check = check self.session_id: Optional[int] = None self.spark_conf = spark_conf self.spark_jars = spark_jars self.spark_executor_memory = spark_executor_memory self.spark_executor_cores = spark_executor_cores self.spark_proxy_user = spark_proxy_user
def test_delete_session(mocker, server): client = LivyClient(server) client.delete_session(MOCK_SESSION_ID)
class LivySession: def __init__( self, url: str, kind: SessionKind = SessionKind.PYSPARK, spark_conf: Dict[str, Any] = None, echo: bool = True, check: bool = True, spark_jars: List[str] = None, spark_executor_cores: int = None, spark_executor_memory: str = None, spark_proxy_user: str = None ) -> None: self.client = LivyClient(url) self.kind = kind self.echo = echo self.check = check self.session_id: Optional[int] = None self.spark_conf = spark_conf self.spark_jars = spark_jars self.spark_executor_memory = spark_executor_memory self.spark_executor_cores = spark_executor_cores self.spark_proxy_user = spark_proxy_user def __enter__(self) -> 'LivySession': self.start() return self def __exit__(self, exc_type, exc_value, traceback) -> None: self.close() def start(self) -> None: session = self.client.create_session(self.kind, self.spark_conf, self.spark_jars, self.spark_executor_cores, self.spark_executor_memory,self.spark_proxy_user) self.session_id = session.session_id not_ready = {SessionState.NOT_STARTED, SessionState.STARTING} intervals = polling_intervals([0.1, 0.2, 0.3, 0.5], 1.0) while self.state in not_ready: time.sleep(next(intervals)) @property def state(self) -> SessionState: if self.session_id is None: raise ValueError('session not yet started') session = self.client.get_session(self.session_id) if session is None: raise ValueError('session not found - it may have been shut down') return session.state def close(self) -> None: if self.session_id is not None: self.client.delete_session(self.session_id) self.client.close() def run(self, code: str) -> Output: output = self._execute(code) if self.echo and output.text: print(output.text) if self.check: output.raise_for_status() return output def read(self, dataframe_name: str) -> pandas.DataFrame: code = serialise_dataframe_code(dataframe_name, self.kind) output = self._execute(code) output.raise_for_status() if output.text is None: raise RuntimeError('statement had no text output') return deserialise_dataframe(output.text) def read_sql(self, code: str) -> pandas.DataFrame: if self.kind != SessionKind.SQL: raise ValueError('not a SQL session') output = self._execute(code) output.raise_for_status() if output.json is None: raise RuntimeError('statement had no JSON output') return dataframe_from_json_output(output.json) def _execute(self, code: str) -> Output: if self.session_id is None: raise ValueError('session not yet started') statement = self.client.create_statement(self.session_id, code) not_finished = {StatementState.WAITING, StatementState.RUNNING} intervals = polling_intervals([0.1, 0.2, 0.3, 0.5], 1.0) while statement.state in not_finished: time.sleep(next(intervals)) statement = self.client.get_statement( statement.session_id, statement.statement_id ) if statement.output is None and statement.state == StatementState.AVAILABLE: """ Incase of statement is processed but the output is yet to be written """ for i in range(100): time.sleep(0.5) statement = self.client.get_statement( statement.session_id, statement.statement_id ) if statement.output is not None: break if statement.output is None: raise RuntimeError('statement had no output') return statement.output