Beispiel #1
0
    def _response_hook(self, response, *args, **kwargs):
        loglevel = log.getEffectiveLevel()

        if loglevel >= 10:
            # Log request at DEBUG severity
            request_hdrs = '\n'.join(
                f'{k}: {v}' for k, v in response.request.headers.items())
            request_log = textwrap.indent(
                f"-----------------------------------------\n"
                f"{response.request.method} {response.request.url}\n"
                f"{request_hdrs}\n"
                f"\n"
                f"{response.request.body}\n"
                f"-----------------------------------------", '\t')
            log.debug(f"HTTP request\n{request_log}")
        if loglevel >= 5:
            # Log response at super-DEBUG severity
            response_hdrs = '\n'.join(f'{k}: {v}'
                                      for k, v in response.headers.items())
            if kwargs.get('stream'):
                response_log = textwrap.indent(
                    f"-----------------------------------------\n"
                    f"{response.status_code} {response.reason} {response.url}\n"
                    f"{response_hdrs}\n"
                    "Streaming Data\n"
                    f"-----------------------------------------", '\t')
            else:
                response_log = textwrap.indent(
                    f"-----------------------------------------\n"
                    f"{response.status_code} {response.reason} {response.url}\n"
                    f"{response_hdrs}\n"
                    f"\n"
                    f"{response.text}\n"
                    f"-----------------------------------------", '\t')
            log.log(5, f"HTTP response\n{response_log}")
Beispiel #2
0
    def download_data(self, observation_id, *, filename=None, verbose=False,
                      cache=True, prop=False, credentials_file=None, **kwargs):
        """
        Download data from XMM-Newton

        Parameters
        ----------
        observation_id : string
            id of the observation to be downloaded, mandatory
            The identifier of the observation we want to retrieve, 10 digits
            example: 0144090201
        filename : string
            file name to be used to store the file
        verbose : bool
            optional, default 'False'
            flag to display information about the process
        prop: boolean
            optional, default 'False'
            flag to download proprietary data, the method will then ask the user to
            input their username and password either manually or using the credentials_file
        credentials_file: string
            optional, default None
            path to where the users config.ini file is stored with their username and password
        level : string
            level to download, optional, by default everything is downloaded
            values: ODF, PPS
        instname : string
            instrument name, optional, two characters, by default everything
            values: OM, R1, R2, M1, M2, PN
        instmode : string
            instrument mode, optional
            examples: Fast, FlatFieldLow, Image, PrimeFullWindow
        filter : string
            filter, optional
            examples: Closed, Open, Thick, UVM2, UVW1, UVW2, V
        expflag : string
            exposure flag, optional, by default everything
            values: S, U, X(not applicable)
        expno : integer
            exposure number with 3 digits, by default all exposures
            examples: 001, 003
        name : string
            product type, optional, 6 characters, by default all product types
            examples: 3COLIM, ATTTSR, EVENLI, SBSPEC, EXPMAP, SRCARF
        datasubsetno : character
            data subset number, optional, by default all
            examples: 0, 1
        sourceno : hex value
            source number, optional, by default all sources
            example: 00A, 021, 001
        extension : string
            file format, optional, by default all formats
            values: ASC, ASZ, FTZ, HTM, IND, PDF, PNG

        Returns
        -------
        None if not verbose. It downloads the observation indicated
        If verbose returns the filename
        """

        # create url to access the aio
        link = self._create_link(observation_id, **kwargs)

        # If the user wants to access proprietary data, ask them for their credentials
        if prop:
            username, password = self._get_username_and_password(credentials_file)
            link = f"{link}&AIOUSER={username}&AIOPWD={password}"

        if verbose:
            log.info(link)

        # get response of created url
        params = self._request_link(link, cache)
        r_filename = params["filename"]
        suffixes = Path(r_filename).suffixes

        # get desired filename
        filename = self._create_filename(filename, observation_id, suffixes)
        """
        If prop we change the log level so that it is above 20, this is to stop a log.debug (line 431) in query.py.
        This debug reveals the url being sent which in turn reveals the users username and password
        """
        if prop:
            previouslevel = log.getEffectiveLevel()
            log.setLevel(21)
            self._download_file(link, filename, head_safe=True, cache=cache)
            log.setLevel(previouslevel)
        else:
            self._download_file(link, filename, head_safe=True, cache=cache)

        if verbose:
            log.info(f"Wrote {link} to {filename}")
Beispiel #3
0
    def _download_file(self,
                       url,
                       local_filepath,
                       timeout=None,
                       auth=None,
                       continuation=True,
                       cache=False,
                       method="GET",
                       head_safe=False,
                       **kwargs):
        """
        Download a file.  Resembles `astropy.utils.data.download_file` but uses
        the local ``_session``

        Parameters
        ----------
        url : string
        local_filepath : string
        timeout : int
        auth : dict or None
        continuation : bool
            If the file has already been partially downloaded *and* the server
            supports HTTP "range" requests, the download will be continued
            where it left off.
        cache : bool
        method : "GET" or "POST"
        head_safe : bool
        """

        if head_safe:
            response = self._session.request("HEAD",
                                             url,
                                             timeout=timeout,
                                             stream=True,
                                             auth=auth,
                                             **kwargs)
        else:
            response = self._session.request(method,
                                             url,
                                             timeout=timeout,
                                             stream=True,
                                             auth=auth,
                                             **kwargs)

        response.raise_for_status()
        if 'content-length' in response.headers:
            length = int(response.headers['content-length'])
            if length == 0:
                log.warn('URL {0} has length=0'.format(url))
        else:
            length = None

        if ((os.path.exists(local_filepath)
             and ('Accept-Ranges' in response.headers) and continuation)):
            open_mode = 'ab'

            existing_file_length = os.stat(local_filepath).st_size
            if length is not None and existing_file_length >= length:
                # all done!
                log.info(
                    "Found cached file {0} with expected size {1}.".format(
                        local_filepath, existing_file_length))
                return
            elif existing_file_length == 0:
                open_mode = 'wb'
            else:
                log.info("Continuing download of file {0}, with {1} bytes to "
                         "go ({2}%)".format(
                             local_filepath, length - existing_file_length,
                             (length - existing_file_length) / length * 100))

                # bytes are indexed from 0:
                # https://en.wikipedia.org/wiki/List_of_HTTP_header_fields#range-request-header
                end = "{0}".format(length - 1) if length is not None else ""
                self._session.headers['Range'] = "bytes={0}-{1}".format(
                    existing_file_length, end)

                response = self._session.request(method,
                                                 url,
                                                 timeout=timeout,
                                                 stream=True,
                                                 auth=auth,
                                                 **kwargs)
                response.raise_for_status()
                del self._session.headers['Range']

        elif cache and os.path.exists(local_filepath):
            if length is not None:
                statinfo = os.stat(local_filepath)
                if statinfo.st_size != length:
                    log.warning("Found cached file {0} with size {1} that is "
                                "different from expected size {2}".format(
                                    local_filepath, statinfo.st_size, length))
                    open_mode = 'wb'
                else:
                    log.info(
                        "Found cached file {0} with expected size {1}.".format(
                            local_filepath, statinfo.st_size))
                    response.close()
                    return
            else:
                log.info("Found cached file {0}.".format(local_filepath))
                response.close()
                return
        else:
            open_mode = 'wb'
            if head_safe:
                response = self._session.request(method,
                                                 url,
                                                 timeout=timeout,
                                                 stream=True,
                                                 auth=auth,
                                                 **kwargs)
                response.raise_for_status()

        blocksize = astropy.utils.data.conf.download_block_size

        log.debug(
            f"Downloading URL {url} to {local_filepath} with size {length} "
            f"by blocks of {blocksize}")

        bytes_read = 0

        # Only show progress bar if logging level is INFO or lower.
        if log.getEffectiveLevel() <= 20:
            progress_stream = None  # Astropy default
        else:
            progress_stream = io.StringIO()

        with ProgressBarOrSpinner(
                length,
            ('Downloading URL {0} to {1} ...'.format(url, local_filepath)),
                file=progress_stream) as pb:
            with open(local_filepath, open_mode) as f:
                for block in response.iter_content(blocksize):
                    f.write(block)
                    bytes_read += blocksize
                    if length is not None:
                        pb.update(
                            bytes_read if bytes_read <= length else length)
                    else:
                        pb.update(bytes_read)

        response.close()
        return response