def test_batch_from_json_no_optionals():
    batch_json = {
        "id": 2398,
        "state": "starting",
    }

    expected = Batch(
        batch_id=2398,
        app_id=None,
        app_info=None,
        log=[],
        state=SessionState.STARTING,
    )

    assert Batch.from_json(batch_json) == expected
Beispiel #2
0
    def get_batch(self, batch_id: int) -> Optional[Batch]:
        """Get information about a batch.

        :param batch_id: The ID of the batch.
        """
        try:
            data = self._client.get(f"/batches/{batch_id}")
        except requests.HTTPError as e:
            if e.response.status_code == 404:
                return None
            else:
                raise
        return Batch.from_json(data)
def test_batch_from_json():
    batch_json = {
        "id": 2398,
        "appId": "application_000000000000_000001",
        "appInfo": {
            "key1": "val1",
            "key2": "val2"
        },
        "log": ["log1", "log2"],
        "state": "running",
    }

    expected = Batch(
        batch_id=2398,
        app_id="application_000000000000_000001",
        app_info={
            "key1": "val1",
            "key2": "val2"
        },
        log=["log1", "log2"],
        state=SessionState.RUNNING,
    )

    assert Batch.from_json(batch_json) == expected
Beispiel #4
0
 def list_batches(self) -> List[Batch]:
     """List all the active batches in Livy."""
     response = self._client.get("/batches")
     return [Batch.from_json(data) for data in response["sessions"]]
Beispiel #5
0
    def create_batch(
        self,
        file: str,
        class_name: str = None,
        args: List[str] = None,
        proxy_user: str = None,
        jars: List[str] = None,
        py_files: List[str] = None,
        files: List[str] = None,
        driver_memory: str = None,
        driver_cores: int = None,
        executor_memory: str = None,
        executor_cores: int = None,
        num_executors: int = None,
        archives: List[str] = None,
        queue: str = None,
        name: str = None,
        spark_conf: Dict[str, Any] = None,
    ) -> Batch:
        """Create a new batch in Livy.

        The py_files, files, jars and archives arguments are lists of URLs,
        e.g. ["s3://bucket/object", "hdfs://path/to/file", ...] and must be
        reachable by the Spark driver process.  If the provided URL has no
        scheme, it's considered to be relative to the default file system
        configured in the Livy server.

        URLs in the py_files argument are copied to a temporary staging area
        and inserted into Python's sys.path ahead of the standard library
        paths.  This allows you to import .py, .zip and .egg files in Python.

        URLs for jars, py_files, files and archives arguments are all copied
        to the same working directory on the Spark cluster.

        The driver_memory and executor_memory arguments have the same format
        as JVM memory strings with a size unit suffix ("k", "m", "g" or "t")
        (e.g. 512m, 2g).

        See https://spark.apache.org/docs/latest/configuration.html for more
        information on Spark configuration properties.

        :param file: File containing the application to execute.
        :param class_name: Application Java/Spark main class.
        :param args: An array of strings to be passed to the Spark app.
        :param proxy_user: User to impersonate when starting the session.
        :param jars: URLs of jars to be used in this session.
        :param py_files: URLs of Python files to be used in this session.
        :param files: URLs of files to be used in this session.
        :param driver_memory: Amount of memory to use for the driver process
            (e.g. '512m').
        :param driver_cores: Number of cores to use for the driver process.
        :param executor_memory: Amount of memory to use per executor process
            (e.g. '512m').
        :param executor_cores: Number of cores to use for each executor.
        :param num_executors: Number of executors to launch for this session.
        :param archives: URLs of archives to be used in this session.
        :param queue: The name of the YARN queue to which submitted.
        :param name: The name of this session.
        :param spark_conf: Spark configuration properties.
        """

        batch_session_params: Dict[str, Any] = {"file": file}
        if class_name is not None:
            batch_session_params["className"] = class_name
        if args is not None:
            batch_session_params["args"] = args
        common_params = _new_session_body(
            proxy_user,
            jars,
            py_files,
            files,
            driver_memory,
            driver_cores,
            executor_memory,
            executor_cores,
            num_executors,
            archives,
            queue,
            name,
            spark_conf,
        )
        body = {**batch_session_params, **common_params}

        data = self._client.post("/batches", data=body)
        return Batch.from_json(data)