def get_job_status(self, job_id):
        """
        Fetches the status of the batch job using Livy's batches endpoint

        Keyword arguments:
            job_id {str} -- Job identifier

        Returns:
             response {dict} -- Dictionary with job id, state and the application id.
        """
        job_url = "{}/batches/{}".format(self.url, job_id)

        response = RestUtil.request_with_retry().get(url=job_url,
                                                     auth=self.auth)

        if not response.ok:
            if response.status_code == 404:
                raise ObjectNotFoundError(
                    "Job with id {} not found.".format(job_id))

            raise ServiceError("Failed to get jobs state. " + response.text)

        job_response = response.json()
        response = {
            "id": job_response.get("id"),
            "state": job_response.get("state"),
            "appId": job_response.get("appId")
        }

        return response
    def get_job_logs(self, job_id, size):
        """
        Fetches the logs of the batch job using Livy's batches logs endpoint

        Keyword arguments:
            job_id {str} -- Job identifier
            size {int} -- Number of log lines to be returned

        Returns:
             response -- Http method response
        """
        job_logs_url = "{}/batches/{}/logs".format(self.url, job_id)

        if size is not None and size > 0:
            job_logs_url = job_logs_url + "?size={}".format(size)

        response = RestUtil.request_with_retry().get(url=job_logs_url,
                                                     auth=self.auth)

        if not response.ok:
            if response.status_code == 404:
                raise ObjectNotFoundError(
                    "Job with id {} not found.".format(job_id))

            raise ServiceError("Failed to get job logs. " + response.text)

        return response
Ejemplo n.º 3
0
    def download_file(self, file_name_with_path):
        """
        Downloads a file from HDFS location identified by the path

        Keyword arguments:
            file_name_with_path {str} -- Name of the file identified with a path

        Returns:
             response -- Default Flask response object with file content and appropriate headers set
        """
        file_name_with_path = self._get_actual_download_file_path(
            file_name_with_path)

        open_file_url = self.url + file_name_with_path + "?op=OPEN"

        response = RestUtil.request_with_retry().get(open_file_url,
                                                     auth=self.auth,
                                                     allow_redirects=False)
        if response.status_code != 307:
            if response.status_code == 404:
                raise ObjectNotFoundError(
                    "File {} not found.".format(file_name_with_path))
            raise ServiceError(
                "Attempt to open file {0} failed with {1} and {2}.".format(
                    file_name_with_path, response.status_code,
                    response.reason))

        file_download_url = None
        if response.headers is not None:
            file_download_url = response.headers["Location"]

        if file_download_url is not None:
            res = RestUtil.request_with_retry().get(file_download_url,
                                                    auth=self.auth,
                                                    stream=True)
            if not response.ok:
                raise ServiceError(
                    "Attempt to download file {0} failed with {1} and {2}.".
                    format(file_name_with_path, response.status_code,
                           response.reason))

            response = Response(res.content, headers=dict(res.headers))
            response.headers['Content-Type'] = 'application/octet-stream'
            response.headers[
                'Content-Disposition'] = 'attachment;filename="{}"'.format(
                    file_name_with_path.split("/")[-1])

            return response
    def _get_actual_download_file_path(self, file_name_with_path):

        download_file_path = None

        list_status_url = self.url + file_name_with_path + "?op=LISTSTATUS"

        response = RestUtil.request_with_retry().get(list_status_url,
                                                     auth=self.auth)

        if not response.ok:
            if response.status_code == 404:
                raise ObjectNotFoundError(
                    "File {} not found.".format(file_name_with_path))
            raise ServiceError(
                "Attempt to open file {0} failed with {1} and {2}.".format(
                    file_name_with_path, response.status_code,
                    response.reason))

        list_status_response = json.loads(response.text)
        if list_status_response is not None and list_status_response.get(
                "FileStatuses") is not None:
            files_statuses = list_status_response.get("FileStatuses")
            if files_statuses.get("FileStatus") is not None:
                file_status_list = files_statuses.get("FileStatus")

                if len(file_status_list) > 1:
                    raise BadRequestError(
                        "Specified path is a directory containing multiple files. Supported only if single part file is inside folder."
                    )

                path_suffix = file_status_list[0]["pathSuffix"]
                if len(path_suffix) > 0:
                    if file_status_list[0]["type"] == "DIRECTORY":
                        download_file_path = self._get_actual_download_file_path(
                            file_name_with_path + "/" + path_suffix)
                    elif file_status_list[0]["type"] == "FILE":
                        download_file_path = file_name_with_path + "/" + path_suffix
                else:
                    download_file_path = file_name_with_path

        return download_file_path