def exists(self): if self._matched_address: with self.httpr(verb="HEAD") as httpr: # if a file redirect was found if httpr.status_code in range(300, 308): raise HTTPFileException( "The file specified appears to have been moved (HTTP %s), check the URL or try adding 'allow_redirects=True' to the remote() file object: %s" % (httpr.status_code, httpr.url)) return httpr.status_code == requests.codes.ok return False else: raise HTTPFileException( "The file cannot be parsed as an HTTP path in form 'host:port/abs/path/to/file': %s" % self.local_file())
def download(self, make_dest_dirs=True): with self.httpr(stream=True) as httpr: if self.exists(): # Find out if the source file is gzip compressed in order to keep # compression intact after the download. # Per default requests decompresses .gz files. # More detials can be found here: https://stackoverflow.com/questions/25749345/how-to-download-gz-files-with-requests-in-python-without-decoding-it?noredirect=1&lq=1 # Since data transferred with HTTP compression need to be decompressed automatically # check the header and decode if the content is encoded. if (not self.name.endswith(".gz") and httpr.headers.get("Content-Encoding") == "gzip"): # Decode non-gzipped sourcefiles automatically. # This is needed to decompress uncompressed files that are compressed # for the transfer by HTTP compression. httpr.raw.decode_content = True # if the destination path does not exist if make_dest_dirs: os.makedirs(os.path.dirname(self.local_path), exist_ok=True) with open(self.local_path, "wb") as f: shutil.copyfileobj(httpr.raw, f) os.sync() # ensure flush to disk else: raise HTTPFileException( "The file does not seem to exist remotely: %s" % self.remote_file())
def mtime(self): if self.exists(): with self.httpr(verb="HEAD") as httpr: file_mtime = self.get_header_item(httpr, "last-modified", default=None) logger.debug("HTTP last-modified: {}".format(file_mtime)) epochTime = 0 if file_mtime is not None: modified_tuple = email.utils.parsedate_tz(file_mtime) if modified_tuple is None: logger.debug( "HTTP last-modified not in RFC2822 format: `{}`". format(file_mtime)) else: epochTime = email.utils.mktime_tz(modified_tuple) return epochTime else: raise HTTPFileException( "The file does not seem to exist remotely: %s" % self.remote_file())
def exists(self): if self._matched_address: with self.httpr(verb="HEAD") as httpr: return httpr.status_code == requests.codes.ok return False else: raise HTTPFileException( "The file cannot be parsed as an HTTP path in form 'host:port/abs/path/to/file': %s" % self.file())
def mtime(self): if self.exists(): with self.httpr(verb="HEAD") as httpr: file_mtime = self.get_header_item(httpr, "last-modified", default=0) modified_tuple = email.utils.parsedate_tz(file_mtime) epochTime = int(email.utils.mktime_tz(modified_tuple)) return epochTime else: raise HTTPFileException( "The file does not seem to exist remotely: %s" % self.file())
def download(self, make_dest_dirs=True): with self.httpr(stream=True) as httpr: if self.exists(): # if the destination path does not exist if make_dest_dirs: os.makedirs(os.path.dirname(self.local_path), exist_ok=True) with open(self.local_path, 'wb') as f: for chunk in httpr.iter_content(chunk_size=1024): if chunk: # filter out keep-alives f.write(chunk) else: raise HTTPFileException( "The file does not seem to exist remotely: %s" % self.file())
def list(self): raise HTTPFileException( "The HTTP Remote Provider does not currently support list-based operations like glob_wildcards()." )
def upload(self): raise HTTPFileException( "Upload is not permitted for the HTTP remote provider. Is an output set to HTTP.remote()?" )