def get_remote_index_last_download_attempt_time(self, remote_repository): """Return the UTC datetime when the index file was last *attempted* to download The download may not have happened, however, due to unmodified ETag. If no index is available (as in, 'pypm sync' or an equivalent was never run in the first place), return None """ original_index_file = join( dirname(self.get_local_index_path(remote_repository)), remote_repository.REMOTE_INDEX_FILENAME) urlprops = URLGetter.URLProperties(original_index_file).load() if urlprops: return BareDateTime.to_datetime( urlprops.custom['last_attempt_utc'])
def get_remote_index_last_download_attempt_time(self, remote_repository): """Return the UTC datetime when the index file was last *attempted* to download The download may not have happened, however, due to unmodified ETag. If no index is available (as in, 'pypm sync' or an equivalent was never run in the first place), return None """ original_index_file = P.join( P.dirname(self.get_local_index_path(remote_repository)), remote_repository.REMOTE_INDEX_FILENAME) urlprops = URLProperties(original_index_file).load() if urlprops: return BareDateTime.to_datetime( urlprops.custom['last_attempt_utc'])
def get(self, target_file, start_info=None, use_cache=False, save_properties=False, auth=None): """Download the URL and save it to `target_file` Appropriate hook functions are automatically invoked - start_info: message to print (LOG.info) when transfer begins, cache is validated or or an error occurs; it can also be a callable. - use_cache: if True, download will not actually happen if `target_file` is already available *and* its ETag is not changed in the server. For this to work, the previous call to `get` must have passed `save_properties=True` (in order to save the ETag). - save_properties: if True, a file named .$target_file.urlprops containing request headers and other metadata will be saved along side the target_file. (This is required for `use_cache` in future downloads) - auth: (username, password) -- optional http basic auth data Return True only if the download actually happened. """ def invoke_start_info(status): if not start_info: return if six.callable(start_info): i = start_info(status) else: i = start_info.format(status=status) if i: LOG.info(i) if not P.exists(target_file): use_cache = False urlprops = URLProperties(target_file) props = urlprops.load() if props: # write back the new value of `last_attempt_utc` *now* so we don't # have to deal with it when an exception arises later. # last_attempt_utc is simpy the time of the last download attempt props.custom['last_attempt_utc'] = BareDateTime.to_string( datetime.utcnow()) urlprops.save(props.headers, props.custom) req = None if use_cache and props: # Enable the cache header `If-None-Match` etag = props.headers.get('ETag', props.headers.get('etag', None)) if etag: req = six.moves.Request(self.url, headers={'If-None-Match': etag}) else: LOG.warn('no ETag in last headers: %s', props.headers) req = six.moves.Request(self.url) else: req = six.moves.Request(self.url) if auth: username, password = auth req.add_header('Authorization', _create_http_basicauth_header(username, password)) # Set User-Agent # XXX: (in 2.6) urllib2.py does not expose its default user-agent string # so we copy-paste that code here (from urllib2.OpenerDirector) urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version pypm_user_agent = get_user_agent(urllib2_user_agent) assert pypm_user_agent req.add_header('User-Agent', pypm_user_agent) try: u = six.moves.urlopen(req) except six.moves.HTTPError as e: if e.code == 304 and use_cache: invoke_start_info('Hit') return False # file not changed in server else: invoke_start_info('Get') raise else: invoke_start_info('Get') bs = 1024 * 8 # block size; from urllib.py:urlretrieve headers = u.info() # detect total size of the file to be downloaded if 'Content-Length' in headers: total_size = int(headers['Content-Length']) assert total_size >= 0 else: total_size = None total_bytes_transferred = 0 chunk_size = 0 # Hook 1: initialize self.hook_initialize(total_size) with open(target_file + '.part', 'wb') as f: while True: if total_size: assert total_bytes_transferred <= total_size, str( (total_bytes_transferred, total_size)) # Hook 2: transferring self.hook_transferring(chunk_size, total_bytes_transferred) data = u.read(bs) chunk_size = len(data) if len(data) == 0: break f.write(data) total_bytes_transferred += chunk_size sh.mv(target_file + '.part', target_file) # Hook 3: completed self.hook_completed() # save the new headers and other properties if save_properties: custom_dict = props and props.custom or { } # reuse existing props.custom custom_dict['last_attempt_utc'] = BareDateTime.to_string( datetime.utcnow()) urlprops.save(dict(headers), custom_dict) return True
def get(self, target_file, start_info=None, use_cache=False, save_properties=False, auth=None): """Download the URL and save it to `target_file` Appropriate hook functions are automatically invoked - start_info: message to print (LOG.info) when transfer begins, cache is validated or or an error occurs; it can also be a callable. - use_cache: if True, download will not actually happen if `target_file` is already available *and* its ETag is not changed in the server. For this to work, the previous call to `get` must have passed `save_properties=True` (in order to save the ETag). - save_properties: if True, a file named .$target_file.urlprops containing request headers and other metadata will be saved along side the target_file. (This is required for `use_cache` in future downloads) - auth: (username, password) -- optional http basic auth data Return True only if the download actually happened. """ def invoke_start_info(status): if not start_info: return if six.callable(start_info): i = start_info(status) else: i = start_info.format(status=status) if i: LOG.info(i) if not P.exists(target_file): use_cache = False urlprops = URLProperties(target_file) props = urlprops.load() if props: # write back the new value of `last_attempt_utc` *now* so we don't # have to deal with it when an exception arises later. # last_attempt_utc is simpy the time of the last download attempt props.custom['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow()) urlprops.save(props.headers, props.custom) req = None if use_cache and props: # Enable the cache header `If-None-Match` etag = props.headers.get('ETag', props.headers.get('etag', None)) if etag: req = six.moves.Request(self.url, headers={'If-None-Match': etag}) else: LOG.warn('no ETag in last headers: %s', props.headers) req = six.moves.Request(self.url) else: req = six.moves.Request(self.url) if auth: username, password = auth req.add_header('Authorization', _create_http_basicauth_header( username, password )) # Set User-Agent # XXX: (in 2.6) urllib2.py does not expose its default user-agent string # so we copy-paste that code here (from urllib2.OpenerDirector) urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version pypm_user_agent = get_user_agent(urllib2_user_agent) assert pypm_user_agent req.add_header('User-Agent', pypm_user_agent) try: u = six.moves.urlopen(req) except six.moves.HTTPError as e: if e.code == 304 and use_cache: invoke_start_info('Hit') return False # file not changed in server else: invoke_start_info('Get') raise else: invoke_start_info('Get') bs = 1024*8 # block size; from urllib.py:urlretrieve headers = u.info() # detect total size of the file to be downloaded if 'Content-Length' in headers: total_size = int(headers['Content-Length']) assert total_size >= 0 else: total_size = None total_bytes_transferred = 0 chunk_size = 0 # Hook 1: initialize self.hook_initialize(total_size) with open(target_file + '.part', 'wb') as f: while True: if total_size: assert total_bytes_transferred <= total_size, str(( total_bytes_transferred, total_size)) # Hook 2: transferring self.hook_transferring(chunk_size, total_bytes_transferred) data = u.read(bs) chunk_size = len(data) if len(data) == 0: break f.write(data) total_bytes_transferred += chunk_size sh.mv(target_file + '.part', target_file) # Hook 3: completed self.hook_completed() # save the new headers and other properties if save_properties: custom_dict = props and props.custom or {} # reuse existing props.custom custom_dict['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow()) urlprops.save(dict(headers), custom_dict) return True
def get(self, target_file, info=None, use_cache=False, save_properties=False, auth=None): """Download the URL and save it to `target_file` Appropriate hook functions are automatically invoked - info: message to print (LOG.info) if transfer actually happens - use_cache: if True, download will not actually happen if `target_file` is already available *and* its ETag is not changed in the server. For this to work effectively, the previous ETag must be made available in the properties file that is created when `save_properties` flag is set to True in the *past* download call. - save_properties: if True, a file named .$target_file.urlprops containing request headers and other metadata is saved along side the target_file. (This is required for `use_cache` in future downloads) - auth: (username, password) -- optional http basic auth data """ assert target_file urlprops = self.URLProperties(target_file) props = urlprops.load() if props: # write back the new value for last_attempt_utc *now* so we don't # have to deal with when an exception arises later. props.custom['last_attempt_utc'] = BareDateTime.to_string( datetime.utcnow()) urlprops.save(props.headers, props.custom) if use_cache and props: req = urllib2.Request( self.url, headers={'If-None-Match': props.headers['etag']}) else: req = urllib2.Request(self.url) if auth: username, password = auth req.add_header('Authorization', _create_http_basicauth_header(username, password)) # Set User-Agent # XXX: urllib2.py does not expose its default user-agent string; so # so we copy-paste that code here (from urllib2.OpenerDirector) urllib2_user_agent = "Python-urllib/%s" % urllib2.__version__ pypm_user_agent = get_user_agent(urllib2_user_agent) req.add_header('User-Agent', pypm_user_agent) u = urllib2.urlopen(req) bs = 1024 * 8 # from urllib.py:urlretrieve headers = u.info() # detect total size of the file to be downloaded if 'Content-Length' in headers: total_size = int(headers['Content-Length']) assert total_size >= 0 else: total_size = None total_bytes_transferred = 0 chunk_size = 0 self.hook_initialize(total_size) if info: LOG.info(info) with open(target_file, 'wb') as f: while True: if total_size: assert total_bytes_transferred <= total_size, str( (total_bytes_transferred, total_size)) self.hook_transferring(chunk_size, total_bytes_transferred) data = u.read(bs) chunk_size = len(data) if data == '': break f.write(data) total_bytes_transferred += chunk_size self.hook_completed() # save the new headers and other properties if save_properties: custom_dict = props and props.custom or { } # reuse existing props.custom custom_dict['last_attempt_utc'] = BareDateTime.to_string( datetime.utcnow()) urlprops.save(dict(headers), custom_dict)
def get(self, target_file, info=None, use_cache=False, save_properties=False, auth=None): """Download the URL and save it to `target_file` Appropriate hook functions are automatically invoked - info: message to print (LOG.info) if transfer actually happens - use_cache: if True, download will not actually happen if `target_file` is already available *and* its ETag is not changed in the server. For this to work effectively, the previous ETag must be made available in the properties file that is created when `save_properties` flag is set to True in the *past* download call. - save_properties: if True, a file named .$target_file.urlprops containing request headers and other metadata is saved along side the target_file. (This is required for `use_cache` in future downloads) - auth: (username, password) -- optional http basic auth data """ assert target_file urlprops = self.URLProperties(target_file) props = urlprops.load() if props: # write back the new value for last_attempt_utc *now* so we don't # have to deal with when an exception arises later. props.custom['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow()) urlprops.save(props.headers, props.custom) req = None if use_cache and props: etag = props.headers.get('ETag', props.headers.get('etag', None)) if etag: req = six.moves.Request(self.url, headers={'If-None-Match': etag}) else: LOG.warn('no ETag in last headers: %s', props.headers) req = six.moves.Request(self.url) else: req = six.moves.Request(self.url) if auth: username, password = auth req.add_header('Authorization', _create_http_basicauth_header( username, password )) # Set User-Agent # XXX: (in 2.6) urllib2.py does not expose its default user-agent string # so we copy-paste that code here (from urllib2.OpenerDirector) urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version pypm_user_agent = get_user_agent(urllib2_user_agent) assert pypm_user_agent req.add_header('User-Agent', pypm_user_agent) u = six.moves.urlopen(req) bs = 1024*8 # from urllib.py:urlretrieve headers = u.info() # detect total size of the file to be downloaded if 'Content-Length' in headers: total_size = int(headers['Content-Length']) assert total_size >= 0 else: total_size = None total_bytes_transferred = 0 chunk_size = 0 self.hook_initialize(total_size) if info: LOG.info(info) with open(target_file, 'wb') as f: while True: if total_size: assert total_bytes_transferred <= total_size, str(( total_bytes_transferred, total_size)) self.hook_transferring(chunk_size, total_bytes_transferred) data = u.read(bs) chunk_size = len(data) if len(data) == 0: break f.write(data) total_bytes_transferred += chunk_size self.hook_completed() # save the new headers and other properties if save_properties: custom_dict = props and props.custom or {} # reuse existing props.custom custom_dict['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow()) urlprops.save(dict(headers), custom_dict)