def _response_hook(self, response, *args, **kwargs): loglevel = log.getEffectiveLevel() if loglevel >= 10: # Log request at DEBUG severity request_hdrs = '\n'.join( f'{k}: {v}' for k, v in response.request.headers.items()) request_log = textwrap.indent( f"-----------------------------------------\n" f"{response.request.method} {response.request.url}\n" f"{request_hdrs}\n" f"\n" f"{response.request.body}\n" f"-----------------------------------------", '\t') log.debug(f"HTTP request\n{request_log}") if loglevel >= 5: # Log response at super-DEBUG severity response_hdrs = '\n'.join(f'{k}: {v}' for k, v in response.headers.items()) if kwargs.get('stream'): response_log = textwrap.indent( f"-----------------------------------------\n" f"{response.status_code} {response.reason} {response.url}\n" f"{response_hdrs}\n" "Streaming Data\n" f"-----------------------------------------", '\t') else: response_log = textwrap.indent( f"-----------------------------------------\n" f"{response.status_code} {response.reason} {response.url}\n" f"{response_hdrs}\n" f"\n" f"{response.text}\n" f"-----------------------------------------", '\t') log.log(5, f"HTTP response\n{response_log}")
def download_data(self, observation_id, *, filename=None, verbose=False, cache=True, prop=False, credentials_file=None, **kwargs): """ Download data from XMM-Newton Parameters ---------- observation_id : string id of the observation to be downloaded, mandatory The identifier of the observation we want to retrieve, 10 digits example: 0144090201 filename : string file name to be used to store the file verbose : bool optional, default 'False' flag to display information about the process prop: boolean optional, default 'False' flag to download proprietary data, the method will then ask the user to input their username and password either manually or using the credentials_file credentials_file: string optional, default None path to where the users config.ini file is stored with their username and password level : string level to download, optional, by default everything is downloaded values: ODF, PPS instname : string instrument name, optional, two characters, by default everything values: OM, R1, R2, M1, M2, PN instmode : string instrument mode, optional examples: Fast, FlatFieldLow, Image, PrimeFullWindow filter : string filter, optional examples: Closed, Open, Thick, UVM2, UVW1, UVW2, V expflag : string exposure flag, optional, by default everything values: S, U, X(not applicable) expno : integer exposure number with 3 digits, by default all exposures examples: 001, 003 name : string product type, optional, 6 characters, by default all product types examples: 3COLIM, ATTTSR, EVENLI, SBSPEC, EXPMAP, SRCARF datasubsetno : character data subset number, optional, by default all examples: 0, 1 sourceno : hex value source number, optional, by default all sources example: 00A, 021, 001 extension : string file format, optional, by default all formats values: ASC, ASZ, FTZ, HTM, IND, PDF, PNG Returns ------- None if not verbose. It downloads the observation indicated If verbose returns the filename """ # create url to access the aio link = self._create_link(observation_id, **kwargs) # If the user wants to access proprietary data, ask them for their credentials if prop: username, password = self._get_username_and_password(credentials_file) link = f"{link}&AIOUSER={username}&AIOPWD={password}" if verbose: log.info(link) # get response of created url params = self._request_link(link, cache) r_filename = params["filename"] suffixes = Path(r_filename).suffixes # get desired filename filename = self._create_filename(filename, observation_id, suffixes) """ If prop we change the log level so that it is above 20, this is to stop a log.debug (line 431) in query.py. This debug reveals the url being sent which in turn reveals the users username and password """ if prop: previouslevel = log.getEffectiveLevel() log.setLevel(21) self._download_file(link, filename, head_safe=True, cache=cache) log.setLevel(previouslevel) else: self._download_file(link, filename, head_safe=True, cache=cache) if verbose: log.info(f"Wrote {link} to {filename}")
def _download_file(self, url, local_filepath, timeout=None, auth=None, continuation=True, cache=False, method="GET", head_safe=False, **kwargs): """ Download a file. Resembles `astropy.utils.data.download_file` but uses the local ``_session`` Parameters ---------- url : string local_filepath : string timeout : int auth : dict or None continuation : bool If the file has already been partially downloaded *and* the server supports HTTP "range" requests, the download will be continued where it left off. cache : bool method : "GET" or "POST" head_safe : bool """ if head_safe: response = self._session.request("HEAD", url, timeout=timeout, stream=True, auth=auth, **kwargs) else: response = self._session.request(method, url, timeout=timeout, stream=True, auth=auth, **kwargs) response.raise_for_status() if 'content-length' in response.headers: length = int(response.headers['content-length']) if length == 0: log.warn('URL {0} has length=0'.format(url)) else: length = None if ((os.path.exists(local_filepath) and ('Accept-Ranges' in response.headers) and continuation)): open_mode = 'ab' existing_file_length = os.stat(local_filepath).st_size if length is not None and existing_file_length >= length: # all done! log.info( "Found cached file {0} with expected size {1}.".format( local_filepath, existing_file_length)) return elif existing_file_length == 0: open_mode = 'wb' else: log.info("Continuing download of file {0}, with {1} bytes to " "go ({2}%)".format( local_filepath, length - existing_file_length, (length - existing_file_length) / length * 100)) # bytes are indexed from 0: # https://en.wikipedia.org/wiki/List_of_HTTP_header_fields#range-request-header end = "{0}".format(length - 1) if length is not None else "" self._session.headers['Range'] = "bytes={0}-{1}".format( existing_file_length, end) response = self._session.request(method, url, timeout=timeout, stream=True, auth=auth, **kwargs) response.raise_for_status() del self._session.headers['Range'] elif cache and os.path.exists(local_filepath): if length is not None: statinfo = os.stat(local_filepath) if statinfo.st_size != length: log.warning("Found cached file {0} with size {1} that is " "different from expected size {2}".format( local_filepath, statinfo.st_size, length)) open_mode = 'wb' else: log.info( "Found cached file {0} with expected size {1}.".format( local_filepath, statinfo.st_size)) response.close() return else: log.info("Found cached file {0}.".format(local_filepath)) response.close() return else: open_mode = 'wb' if head_safe: response = self._session.request(method, url, timeout=timeout, stream=True, auth=auth, **kwargs) response.raise_for_status() blocksize = astropy.utils.data.conf.download_block_size log.debug( f"Downloading URL {url} to {local_filepath} with size {length} " f"by blocks of {blocksize}") bytes_read = 0 # Only show progress bar if logging level is INFO or lower. if log.getEffectiveLevel() <= 20: progress_stream = None # Astropy default else: progress_stream = io.StringIO() with ProgressBarOrSpinner( length, ('Downloading URL {0} to {1} ...'.format(url, local_filepath)), file=progress_stream) as pb: with open(local_filepath, open_mode) as f: for block in response.iter_content(blocksize): f.write(block) bytes_read += blocksize if length is not None: pb.update( bytes_read if bytes_read <= length else length) else: pb.update(bytes_read) response.close() return response