def get(self, target_file, start_info=None, use_cache=False, save_properties=False, auth=None): """Download the URL and save it to `target_file` Appropriate hook functions are automatically invoked - start_info: message to print (LOG.info) when transfer begins, cache is validated or or an error occurs; it can also be a callable. - use_cache: if True, download will not actually happen if `target_file` is already available *and* its ETag is not changed in the server. For this to work, the previous call to `get` must have passed `save_properties=True` (in order to save the ETag). - save_properties: if True, a file named .$target_file.urlprops containing request headers and other metadata will be saved along side the target_file. (This is required for `use_cache` in future downloads) - auth: (username, password) -- optional http basic auth data Return True only if the download actually happened. """ def invoke_start_info(status): if not start_info: return if six.callable(start_info): i = start_info(status) else: i = start_info.format(status=status) if i: LOG.info(i) if not P.exists(target_file): use_cache = False urlprops = URLProperties(target_file) props = urlprops.load() if props: # write back the new value of `last_attempt_utc` *now* so we don't # have to deal with it when an exception arises later. # last_attempt_utc is simpy the time of the last download attempt props.custom['last_attempt_utc'] = BareDateTime.to_string( datetime.utcnow()) urlprops.save(props.headers, props.custom) req = None if use_cache and props: # Enable the cache header `If-None-Match` etag = props.headers.get('ETag', props.headers.get('etag', None)) if etag: req = six.moves.Request(self.url, headers={'If-None-Match': etag}) else: LOG.warn('no ETag in last headers: %s', props.headers) req = six.moves.Request(self.url) else: req = six.moves.Request(self.url) if auth: username, password = auth req.add_header('Authorization', _create_http_basicauth_header(username, password)) # Set User-Agent # XXX: (in 2.6) urllib2.py does not expose its default user-agent string # so we copy-paste that code here (from urllib2.OpenerDirector) urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version pypm_user_agent = get_user_agent(urllib2_user_agent) assert pypm_user_agent req.add_header('User-Agent', pypm_user_agent) try: u = six.moves.urlopen(req) except six.moves.HTTPError as e: if e.code == 304 and use_cache: invoke_start_info('Hit') return False # file not changed in server else: invoke_start_info('Get') raise else: invoke_start_info('Get') bs = 1024 * 8 # block size; from urllib.py:urlretrieve headers = u.info() # detect total size of the file to be downloaded if 'Content-Length' in headers: total_size = int(headers['Content-Length']) assert total_size >= 0 else: total_size = None total_bytes_transferred = 0 chunk_size = 0 # Hook 1: initialize self.hook_initialize(total_size) with open(target_file + '.part', 'wb') as f: while True: if total_size: assert total_bytes_transferred <= total_size, str( (total_bytes_transferred, total_size)) # Hook 2: transferring self.hook_transferring(chunk_size, total_bytes_transferred) data = u.read(bs) chunk_size = len(data) if len(data) == 0: break f.write(data) total_bytes_transferred += chunk_size sh.mv(target_file + '.part', target_file) # Hook 3: completed self.hook_completed() # save the new headers and other properties if save_properties: custom_dict = props and props.custom or { } # reuse existing props.custom custom_dict['last_attempt_utc'] = BareDateTime.to_string( datetime.utcnow()) urlprops.save(dict(headers), custom_dict) return True
def get(self, target_file, start_info=None, use_cache=False, save_properties=False, auth=None): """Download the URL and save it to `target_file` Appropriate hook functions are automatically invoked - start_info: message to print (LOG.info) when transfer begins, cache is validated or or an error occurs; it can also be a callable. - use_cache: if True, download will not actually happen if `target_file` is already available *and* its ETag is not changed in the server. For this to work, the previous call to `get` must have passed `save_properties=True` (in order to save the ETag). - save_properties: if True, a file named .$target_file.urlprops containing request headers and other metadata will be saved along side the target_file. (This is required for `use_cache` in future downloads) - auth: (username, password) -- optional http basic auth data Return True only if the download actually happened. """ def invoke_start_info(status): if not start_info: return if six.callable(start_info): i = start_info(status) else: i = start_info.format(status=status) if i: LOG.info(i) if not P.exists(target_file): use_cache = False urlprops = URLProperties(target_file) props = urlprops.load() if props: # write back the new value of `last_attempt_utc` *now* so we don't # have to deal with it when an exception arises later. # last_attempt_utc is simpy the time of the last download attempt props.custom['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow()) urlprops.save(props.headers, props.custom) req = None if use_cache and props: # Enable the cache header `If-None-Match` etag = props.headers.get('ETag', props.headers.get('etag', None)) if etag: req = six.moves.Request(self.url, headers={'If-None-Match': etag}) else: LOG.warn('no ETag in last headers: %s', props.headers) req = six.moves.Request(self.url) else: req = six.moves.Request(self.url) if auth: username, password = auth req.add_header('Authorization', _create_http_basicauth_header( username, password )) # Set User-Agent # XXX: (in 2.6) urllib2.py does not expose its default user-agent string # so we copy-paste that code here (from urllib2.OpenerDirector) urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version pypm_user_agent = get_user_agent(urllib2_user_agent) assert pypm_user_agent req.add_header('User-Agent', pypm_user_agent) try: u = six.moves.urlopen(req) except six.moves.HTTPError as e: if e.code == 304 and use_cache: invoke_start_info('Hit') return False # file not changed in server else: invoke_start_info('Get') raise else: invoke_start_info('Get') bs = 1024*8 # block size; from urllib.py:urlretrieve headers = u.info() # detect total size of the file to be downloaded if 'Content-Length' in headers: total_size = int(headers['Content-Length']) assert total_size >= 0 else: total_size = None total_bytes_transferred = 0 chunk_size = 0 # Hook 1: initialize self.hook_initialize(total_size) with open(target_file + '.part', 'wb') as f: while True: if total_size: assert total_bytes_transferred <= total_size, str(( total_bytes_transferred, total_size)) # Hook 2: transferring self.hook_transferring(chunk_size, total_bytes_transferred) data = u.read(bs) chunk_size = len(data) if len(data) == 0: break f.write(data) total_bytes_transferred += chunk_size sh.mv(target_file + '.part', target_file) # Hook 3: completed self.hook_completed() # save the new headers and other properties if save_properties: custom_dict = props and props.custom or {} # reuse existing props.custom custom_dict['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow()) urlprops.save(dict(headers), custom_dict) return True
def _rollover_log(logfile, maxsize=(2<<20)): """Move $logfile to $logfile.old if its size exceeds `maxsize`""" if exists(logfile): filesize = os.stat(logfile)[stat.ST_SIZE] if filesize >= maxsize: sh.mv(logfile, logfile+'.old')
def _rollover_log(logfile, maxsize=(2 << 20)): """Move $logfile to $logfile.old if its size exceeds `maxsize`""" if exists(logfile): filesize = os.stat(logfile)[stat.ST_SIZE] if filesize >= maxsize: sh.mv(logfile, logfile + '.old')