Ejemplo n.º 1
0
def test_session_from_json():

    session_json = {'id': 5, 'kind': 'pyspark', 'state': 'idle'}

    expected = Session(5, SessionKind.PYSPARK, SessionState.IDLE)

    assert Session.from_json(session_json) == expected
Ejemplo n.º 2
0
def test_session_from_json():

    session_json = {
        "id": 5,
        "proxyUser": "******",
        "kind": "pyspark",
        "state": "idle",
    }

    expected = Session(5, "user", SessionKind.PYSPARK, SessionState.IDLE)

    assert Session.from_json(session_json) == expected
Ejemplo n.º 3
0
    def create_session(
        self,
        kind: SessionKind,
        proxy_user: str = None,
        spark_conf: Dict[str, Any] = None,
    ) -> Session:
        """Create a new session in Livy.

        :param kind: The kind of session to create.
        :param proxy_user: User to impersonate when starting the session.
        :param spark_conf: Spark configuration properties.
        """
        if self.legacy_server():
            valid_kinds = VALID_LEGACY_SESSION_KINDS
        else:
            valid_kinds = VALID_SESSION_KINDS

        if kind not in valid_kinds:
            raise ValueError(
                f"{kind} is not a valid session kind for a Livy server of "
                f"this version (should be one of {valid_kinds})")

        body = {"kind": kind.value}
        if proxy_user is not None:
            body["proxyUser"] = proxy_user
        if spark_conf is not None:
            body["conf"] = spark_conf

        data = self._client.post("/sessions", data=body)
        return Session.from_json(data)
Ejemplo n.º 4
0
 def get_session(self, session_id: int) -> Optional[Session]:
     try:
         data = self._client.get(f"/sessions/{session_id}")
     except requests.HTTPError as e:
         if e.response.status_code == 404:
             return None
         else:
             raise
     return Session.from_json(data)
Ejemplo n.º 5
0
    def get_session(self, session_id: int) -> Optional[Session]:
        """Get information about a session.

        :param session_id: The ID of the session.
        """
        try:
            data = self._client.get(f"/sessions/{session_id}")
        except requests.HTTPError as e:
            if e.response.status_code == 404:
                return None
            else:
                raise
        return Session.from_json(data)
Ejemplo n.º 6
0
    def create_session(self, kind: SessionKind) -> Session:

        if self.legacy_server():
            valid_kinds = VALID_LEGACY_SESSION_KINDS
        else:
            valid_kinds = VALID_SESSION_KINDS

        if kind not in valid_kinds:
            raise ValueError(
                f'{kind} is not a valid session kind for a Livy server of '
                f'this version (should be one of {valid_kinds})')

        if self.auth is None:
            # Default behavior
            data = {'kind': kind.value}
        else:
            # proxyUser foobar necessary due to - https://issues.apache.org/jira/browse/KNOX-1098
            data = {'kind': kind.value, 'proxyUser': '******'}

        data = self._client.post('/sessions', data=data)
        return Session.from_json(data)
Ejemplo n.º 7
0
    def create_session(self,
                       kind: SessionKind,
                       spark_conf: Dict[str, Any] = None) -> Session:
        if self.legacy_server():
            valid_kinds = VALID_LEGACY_SESSION_KINDS
        else:
            valid_kinds = VALID_SESSION_KINDS

        if kind not in valid_kinds:
            raise ValueError(
                f'{kind} is not a valid session kind for a Livy server of '
                f'this version (should be one of {valid_kinds})')

        body = {'kind': kind.value}
        # proxyUser foobar necessary due to - https://issues.apache.org/jira/browse/KNOX-1098
        if self.auth is not None:
            body['proxyUser'] = '******'
        if spark_conf is not None:
            body['conf'] = spark_conf

        data = self._client.post('/sessions', data=body)
        return Session.from_json(data)
Ejemplo n.º 8
0
    def create_session(
        self,
        kind: SessionKind,
        proxy_user: str = None,
        spark_conf: Dict[str, Any] = None,
    ) -> Session:
        if self.legacy_server():
            valid_kinds = VALID_LEGACY_SESSION_KINDS
        else:
            valid_kinds = VALID_SESSION_KINDS

        if kind not in valid_kinds:
            raise ValueError(
                f"{kind} is not a valid session kind for a Livy server of "
                f"this version (should be one of {valid_kinds})")

        body = {"kind": kind.value}
        if proxy_user is not None:
            body["proxyUser"] = proxy_user
        if spark_conf is not None:
            body["conf"] = spark_conf

        data = self._client.post("/sessions", data=body)
        return Session.from_json(data)
Ejemplo n.º 9
0
    def create_session(
        self,
        kind: SessionKind,
        proxy_user: str = None,
        jars: List[str] = None,
        py_files: List[str] = None,
        files: List[str] = None,
        driver_memory: str = None,
        driver_cores: int = None,
        executor_memory: str = None,
        executor_cores: int = None,
        num_executors: int = None,
        archives: List[str] = None,
        queue: str = None,
        name: str = None,
        spark_conf: Dict[str, Any] = None,
        heartbeat_timeout: int = None,
    ) -> Session:
        """Create a new session in Livy.

        The py_files, files, jars and archives arguments are lists of URLs,
        e.g. ["s3://bucket/object", "hdfs://path/to/file", ...] and must be
        reachable by the Spark driver process.  If the provided URL has no
        scheme, it's considered to be relative to the default file system
        configured in the Livy server.

        URLs in the py_files argument are copied to a temporary staging area
        and inserted into Python's sys.path ahead of the standard library
        paths.  This allows you to import .py, .zip and .egg files in Python.

        URLs for jars, py_files, files and archives arguments are all copied
        to the same working directory on the Spark cluster.

        The driver_memory and executor_memory arguments have the same format
        as JVM memory strings with a size unit suffix ("k", "m", "g" or "t")
        (e.g. 512m, 2g).

        See https://spark.apache.org/docs/latest/configuration.html for more
        information on Spark configuration properties.

        :param kind: The kind of session to create.
        :param proxy_user: User to impersonate when starting the session.
        :param jars: URLs of jars to be used in this session.
        :param py_files: URLs of Python files to be used in this session.
        :param files: URLs of files to be used in this session.
        :param driver_memory: Amount of memory to use for the driver process
            (e.g. '512m').
        :param driver_cores: Number of cores to use for the driver process.
        :param executor_memory: Amount of memory to use per executor process
            (e.g. '512m').
        :param executor_cores: Number of cores to use for each executor.
        :param num_executors: Number of executors to launch for this session.
        :param archives: URLs of archives to be used in this session.
        :param queue: The name of the YARN queue to which submitted.
        :param name: The name of this session.
        :param spark_conf: Spark configuration properties.
        :param heartbeat_timeout: Optional Timeout in seconds to which session
            be automatically orphaned if no heartbeat is received.
        """
        if self.legacy_server():
            valid_kinds = VALID_LEGACY_SESSION_KINDS
        else:
            valid_kinds = VALID_SESSION_KINDS

        if kind not in valid_kinds:
            raise ValueError(
                f"{kind} is not a valid session kind for a Livy server of "
                f"this version (should be one of {valid_kinds})")

        interactive_session_params: Dict[str, Any] = {"kind": kind.value}
        if heartbeat_timeout is not None:
            interactive_session_params[
                "heartbeatTimeoutInSecond"] = heartbeat_timeout
        common_params = _new_session_body(
            proxy_user,
            jars,
            py_files,
            files,
            driver_memory,
            driver_cores,
            executor_memory,
            executor_cores,
            num_executors,
            archives,
            queue,
            name,
            spark_conf,
        )
        body = {**interactive_session_params, **common_params}

        data = self._client.post("/sessions", data=body)
        return Session.from_json(data)
Ejemplo n.º 10
0
 def list_sessions(self) -> List[Session]:
     """List all the active sessions in Livy."""
     data = self._client.get("/sessions")
     return [Session.from_json(item) for item in data["sessions"]]
Ejemplo n.º 11
0
 def list_sessions(self) -> List[Session]:
     data = self._client.get("/sessions")
     return [Session.from_json(item) for item in data["sessions"]]
Ejemplo n.º 12
0
    def create_session(self,
                       kind: SessionKind,
                       proxy_user: str = None,
                       jars: List[str] = None,
                       py_files: List[str] = None,
                       files: List[str] = None,
                       driver_memory: str = None,
                       driver_cores: int = None,
                       executor_memory: str = None,
                       executor_cores: int = None,
                       num_executors: int = None,
                       archives: List[str] = None,
                       queue: str = None,
                       name: str = None,
                       spark_conf: Dict[str, Any] = None,
                       logger: Any = None) -> Session:
        """Create a new session in Livy.

        The py_files, files, jars and archives arguments are lists of URLs,
        e.g. ["s3://bucket/object", "hdfs://path/to/file", ...] and must be
        reachable by the Spark driver process.  If the provided URL has no
        scheme, it's considered to be relative to the default file system
        configured in the Livy server.

        URLs in the py_files argument are copied to a temporary staging area
        and inserted into Python's sys.path ahead of the standard library
        paths.  This allows you to import .py, .zip and .egg files in Python.

        URLs for jars, py_files, files and archives arguments are all copied
        to the same working directory on the Spark cluster.

        The driver_memory and executor_memory arguments have the same format
        as JVM memory strings with a size unit suffix ("k", "m", "g" or "t")
        (e.g. 512m, 2g).

        See https://spark.apache.org/docs/latest/configuration.html for more
        information on Spark configuration properties.

        :param kind: The kind of session to create.
        :param proxy_user: User to impersonate when starting the session.
        :param jars: URLs of jars to be used in this session.
        :param py_files: URLs of Python files to be used in this session.
        :param files: URLs of files to be used in this session.
        :param driver_memory: Amount of memory to use for the driver process
            (e.g. '512m').
        :param driver_cores: Number of cores to use for the driver process.
        :param executor_memory: Amount of memory to use per executor process
            (e.g. '512m').
        :param executor_cores: Number of cores to use for each executor.
        :param num_executors: Number of executors to launch for this session.
        :param archives: URLs of archives to be used in this session.
        :param queue: The name of the YARN queue to which submitted.
        :param name: The name of this session.
        :param spark_conf: Spark configuration properties.
        :param logger: Passed in logger
        """
        if self.legacy_server():
            valid_kinds = VALID_LEGACY_SESSION_KINDS
        else:
            valid_kinds = VALID_SESSION_KINDS

        if kind not in valid_kinds:
            raise ValueError(
                f"{kind} is not a valid session kind for a Livy server of "
                f"this version (should be one of {valid_kinds})")

        body: Dict[str, Any] = {"kind": kind.value}
        if proxy_user is not None:
            body["proxyUser"] = proxy_user
        if jars is not None:
            body["jars"] = jars
        if py_files is not None:
            body["pyFiles"] = py_files
        if files is not None:
            body["files"] = files
        if driver_memory is not None:
            body["driverMemory"] = driver_memory
        if driver_cores is not None:
            body["driverCores"] = driver_cores
        if executor_memory is not None:
            body["executorMemory"] = executor_memory
        if executor_cores is not None:
            body["executorCores"] = executor_cores
        if num_executors is not None:
            body["numExecutors"] = num_executors
        if archives is not None:
            body["archives"] = archives
        if queue is not None:
            body["queue"] = queue
        if name is not None:
            body["name"] = name
        if spark_conf is not None:
            body["conf"] = spark_conf

        data = self._client.post("/sessions", data=body, logger=logger)
        return Session.from_json(data)
Ejemplo n.º 13
0
 def list_sessions(self) -> List[Session]:
     data = self._client.get('/sessions')
     return [Session.from_json(item) for item in data['sessions']]