def get_user_data(timeout=None, url=None, num_retries=None, data=None): """ Get instance user data :type timeout: int :param timeout: timeout for the request :type url: string :param url: metadata_service_url :type num_retries: int :param num_retries: how many times to retry :type data: string :param data: user-defined userdata for testing :rtype: dict :return: instance user data as a dictionary """ if data is not None: return data if timeout is None: timeout = config.getint('Boto', 'http_socket_timeout', 70) if num_retries is None: num_retries = config.getint('Boto', 'num_retries', 5) if url is None: url = config.get( 'Boto', 'metadata_service_url', 'http://169.254.169.254') return get_userdata(timeout=timeout, url=url, num_retries=num_retries)
def GetNewHttp(http_class=httplib2.Http, **kwargs): """Creates and returns a new httplib2.Http instance. Args: http_class: Optional custom Http class to use. **kwargs: Arguments to pass to http_class constructor. Returns: An initialized httplib2.Http instance. """ ##Get Proxy configuration from boto file, defaults are None, 0 and False boto_proxy_config = { 'proxy_host': config.get('Boto', 'proxy', None), 'proxy_type': config.get('Boto', 'proxy_type', 'http'), 'proxy_port': config.getint('Boto', 'proxy_port'), 'proxy_user': config.get('Boto', 'proxy_user', None), 'proxy_pass': config.get('Boto', 'proxy_pass', None), 'proxy_rdns': config.get('Boto', 'proxy_rdns', None) } #Use SetProxyInfo to convert boto config to httplib2.proxyinfo object proxy_info = SetProxyInfo(boto_proxy_config) # Some installers don't package a certs file with httplib2, so use the # one included with gsutil. kwargs['ca_certs'] = GetCertsFile() # Use a non-infinite SSL timeout to avoid hangs during network flakiness. kwargs['timeout'] = SSL_TIMEOUT_SEC http = http_class(proxy_info=proxy_info, **kwargs) http.disable_ssl_certificate_validation = (not config.getbool( 'Boto', 'https_validate_certificates')) return http
def _populate_keys_from_metadata_server(self): # get_instance_metadata is imported here because of a circular # dependency. boto.log.debug("Retrieving credentials from metadata server.") from boto.utils import get_instance_metadata timeout = config.getfloat('Boto', 'metadata_service_timeout', 1.0) attempts = config.getint('Boto', 'metadata_service_num_attempts', 1) # The num_retries arg is actually the total number of attempts made, # so the config options is named *_num_attempts to make this more # clear to users. metadata = get_instance_metadata( timeout=timeout, num_retries=attempts, data='meta-data/iam/security-credentials/') if metadata: # I'm assuming there's only one role on the instance profile. security = metadata.values()[0] self._access_key = security['AccessKeyId'] self._secret_key = self._convert_key_to_str( security['SecretAccessKey']) self._security_token = security['Token'] expires_at = security['Expiration'] self._credential_expiry_time = datetime.strptime( expires_at, "%Y-%m-%dT%H:%M:%SZ") boto.log.debug("Retrieved credentials will expire in %s at: %s", self._credential_expiry_time - datetime.now(), expires_at)
def __init__(self, name, access_key=None, secret_key=None, security_token=None): self.host = None self.port = None self.host_header = None self.access_key = access_key self.secret_key = secret_key self.security_token = security_token self.name = name self.acl_class = self.AclClassMap[self.name] self.canned_acls = self.CannedAclsMap[self.name] self._credential_expiry_time = None self.get_credentials(access_key, secret_key) self.configure_headers() self.configure_errors() # Allow config file to override default host and port. host_opt_name = '%s_host' % self.HostKeyMap[self.name] if config.has_option('Credentials', host_opt_name): self.host = config.get('Credentials', host_opt_name) port_opt_name = '%s_port' % self.HostKeyMap[self.name] if config.has_option('Credentials', port_opt_name): self.port = config.getint('Credentials', port_opt_name) host_header_opt_name = '%s_host_header' % self.HostKeyMap[self.name] if config.has_option('Credentials', host_header_opt_name): self.host_header = config.get('Credentials', host_header_opt_name)
def __init__(self, name, access_key=None, secret_key=None, security_token=None, profile_name=None): self.host = None self.port = None self.host_header = None self.access_key = access_key self.secret_key = secret_key self.security_token = security_token self.profile_name = profile_name self.name = name self.acl_class = self.AclClassMap[self.name] self.canned_acls = self.CannedAclsMap[self.name] self._credential_expiry_time = None self.get_credentials(access_key, secret_key, security_token, profile_name) self.configure_headers() self.configure_errors() # Allow config file to override default host and port. host_opt_name = '%s_host' % self.HostKeyMap[self.name] if config.has_option('Credentials', host_opt_name): self.host = config.get('Credentials', host_opt_name) port_opt_name = '%s_port' % self.HostKeyMap[self.name] if config.has_option('Credentials', port_opt_name): self.port = config.getint('Credentials', port_opt_name) host_header_opt_name = '%s_host_header' % self.HostKeyMap[self.name] if config.has_option('Credentials', host_header_opt_name): self.host_header = config.get('Credentials', host_header_opt_name)
def GetJsonResumableChunkSize(): chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size', 1024*1024*100L) if chunk_size == 0: chunk_size = 1024*256L elif chunk_size % 1024*256L != 0: chunk_size += (1024*256L - (chunk_size % (1024*256L)))
def _mexe(self, method, path, data, headers, host=None, sender=None): """ mexe - Multi-execute inside a loop, retrying multiple times to handle transient Internet errors by simply trying again. Also handles redirects. This code was inspired by the S3Utils classes posted to the boto-users Google group by Larry Bates. Thanks! """ boto.log.debug("Method: %s" % method) boto.log.debug("Path: %s" % path) boto.log.debug("Data: %s" % data) boto.log.debug("Headers: %s" % headers) boto.log.debug("Host: %s" % host) response = None body = None e = None num_retries = config.getint("Boto", "num_retries", self.num_retries) i = 0 connection = self.get_http_connection(host, self.is_secure) while i <= num_retries: try: if callable(sender): response = sender(connection, method, path, data, headers) else: connection.request(method, path, data, headers) response = connection.getresponse() location = response.getheader("location") # -- gross hack -- # httplib gets confused with chunked responses to HEAD requests # so I have to fake it out if method == "HEAD" and getattr(response, "chunked", False): response.chunked = 0 if response.status == 500 or response.status == 503: boto.log.debug("received %d response, retrying in %d seconds" % (response.status, 2 ** i)) body = response.read() elif response.status == 408: body = response.read() print "-------------------------" print " 4 0 8 " print "path=%s" % path print body print "-------------------------" elif response.status < 300 or response.status >= 400 or not location: self.put_http_connection(host, self.is_secure, connection) return response else: scheme, host, path, params, query, fragment = urlparse.urlparse(location) if query: path += "?" + query boto.log.debug("Redirecting: %s" % scheme + "://" + host + path) connection = self.get_http_connection(host, scheme == "https") continue except KeyboardInterrupt: sys.exit("Keyboard Interrupt") except self.http_exceptions, e: boto.log.debug("encountered %s exception, reconnecting" % e.__class__.__name__) connection = self.new_http_connection(host, self.is_secure) time.sleep(2 ** i) i += 1
def __init__(self, name, access_key=None, secret_key=None, security_token=None, profile_name=None): self.host = None self.port = None self.host_header = None self.access_key = access_key self.secret_key = secret_key self.security_token = security_token self.profile_name = profile_name self.name = name self.acl_class = self.AclClassMap[self.name] self.canned_acls = self.CannedAclsMap[self.name] self._credential_expiry_time = None # Load shared credentials file if it exists shared_path = os.path.join(expanduser('~'), '.' + name, 'credentials') self.shared_credentials = Config(do_load=False) if os.path.isfile(shared_path): self.shared_credentials.load_from_path(shared_path) self.get_credentials(access_key, secret_key, security_token, profile_name) self.configure_headers() self.configure_errors() # Allow config file to override default host and port. host_opt_name = '%s_host' % self.HostKeyMap[self.name] if config.has_option('Credentials', host_opt_name): self.host = config.get('Credentials', host_opt_name) port_opt_name = '%s_port' % self.HostKeyMap[self.name] if config.has_option('Credentials', port_opt_name): self.port = config.getint('Credentials', port_opt_name) host_header_opt_name = '%s_host_header' % self.HostKeyMap[self.name] if config.has_option('Credentials', host_header_opt_name): self.host_header = config.get('Credentials', host_header_opt_name)
def _populate_keys_from_metadata_server(self): # get_instance_metadata is imported here because of a circular # dependency. boto.log.debug("Retrieving credentials from metadata server.") from boto.utils import get_instance_metadata timeout = config.getfloat('Boto', 'metadata_service_timeout', 1.0) attempts = config.getint('Boto', 'metadata_service_num_attempts', 1) # The num_retries arg is actually the total number of attempts made, # so the config options is named *_num_attempts to make this more # clear to users. metadata = get_instance_metadata( timeout=timeout, num_retries=attempts, data='meta-data/iam/security-credentials/') if metadata: creds = self._get_credentials_from_metadata(metadata) self._access_key = creds[0] self._secret_key = creds[1] self._security_token = creds[2] expires_at = creds[3] # I'm assuming there's only one role on the instance profile. self._credential_expiry_time = datetime.strptime( expires_at, "%Y-%m-%dT%H:%M:%SZ") boto.log.debug("Retrieved credentials will expire in %s at: %s", self._credential_expiry_time - datetime.now(), expires_at)
def _populate_keys_from_metadata_server(self): # get_instance_metadata is imported here because of a circular # dependency. boto.log.debug("Retrieving credentials from metadata server.") from boto.utils import get_instance_metadata timeout = config.getfloat("Boto", "metadata_service_timeout", 1.0) attempts = config.getint("Boto", "metadata_service_num_attempts", 1) # The num_retries arg is actually the total number of attempts made, # so the config options is named *_num_attempts to make this more # clear to users. metadata = get_instance_metadata( timeout=timeout, num_retries=attempts, data="meta-data/iam/security-credentials/" ) if metadata: # I'm assuming there's only one role on the instance profile. security = metadata.values()[0] self._access_key = security["AccessKeyId"] self._secret_key = self._convert_key_to_str(security["SecretAccessKey"]) self._security_token = security["Token"] expires_at = security["Expiration"] self._credential_expiry_time = datetime.strptime(expires_at, "%Y-%m-%dT%H:%M:%SZ") boto.log.debug( "Retrieved credentials will expire in %s at: %s", self._credential_expiry_time - datetime.now(), expires_at, )
def GetJsonResumableChunkSize(): chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size', long(1024 * 1024 * 100)) if chunk_size == 0: chunk_size = long(1024 * 256) elif chunk_size % long(1024 * 256) != 0: chunk_size += (long(1024 * 256) - (chunk_size % (long(1024 * 256)))) return chunk_size
def _BatchSort(in_iter, out_file): """Sorts input lines from in_iter and outputs to out_file. Sorts in batches as input arrives, so input file does not need to be loaded into memory all at once. Derived from Python Recipe 466302: Sorting big files the Python 2.4 way by Nicolas Lehuen. Sorted format is per _BuildTmpOutputLine. We're sorting on the entire line when we could just sort on the first record (URL); but the sort order is identical either way. Args: in_iter: Input iterator. out_file: Output file. """ # Note: If chunk_files gets very large we can run out of open FDs. See .boto # file comments about rsync_buffer_lines. If increasing rsync_buffer_lines # doesn't suffice (e.g., for someone synchronizing with a really large # bucket), an option would be to make gsutil merge in passes, never # opening all chunk files simultaneously. buffer_size = config.getint("GSUtil", "rsync_buffer_lines", 32000) chunk_files = [] try: while True: current_chunk = sorted(islice(in_iter, buffer_size)) if not current_chunk: break output_chunk = io.open("%s-%06i" % (out_file.name, len(chunk_files)), mode="w+", encoding=UTF8) chunk_files.append(output_chunk) output_chunk.writelines(unicode("".join(current_chunk))) output_chunk.flush() output_chunk.seek(0) out_file.writelines(heapq.merge(*chunk_files)) except IOError as e: if e.errno == errno.EMFILE: raise CommandException( "\n".join( textwrap.wrap( "Synchronization failed because too many open file handles were " "needed while building synchronization state. Please see the " "comments about rsync_buffer_lines in your .boto config file for a " "possible way to address this problem." ) ) ) raise finally: for chunk_file in chunk_files: try: chunk_file.close() os.remove(chunk_file.name) except: pass
def _BatchSort(in_iter, out_file): """Sorts input lines from in_iter and outputs to out_file. Sorts in batches as input arrives, so input file does not need to be loaded into memory all at once. Derived from Python Recipe 466302: Sorting big files the Python 2.4 way by Nicolas Lehuen. Sorted format is per _BuildTmpOutputLine. We're sorting on the entire line when we could just sort on the first record (URL); but the sort order is identical either way. Args: in_iter: Input iterator. out_file: Output file. """ # Note: If chunk_files gets very large we can run out of open FDs. See .boto # file comments about rsync_buffer_lines. If increasing rsync_buffer_lines # doesn't suffice (e.g., for someone synchronizing with a really large # bucket), an option would be to make gsutil merge in passes, never # opening all chunk files simultaneously. buffer_size = config.getint('GSUtil', 'rsync_buffer_lines', 32000) chunk_files = [] try: while True: current_chunk = sorted(islice(in_iter, buffer_size)) if not current_chunk: break output_chunk = io.open('%s-%06i' % (out_file.name, len(chunk_files)), mode='w+', encoding=UTF8) chunk_files.append(output_chunk) output_chunk.writelines(unicode(''.join(current_chunk))) output_chunk.flush() output_chunk.seek(0) out_file.writelines(heapq.merge(*chunk_files)) except IOError as e: if e.errno == errno.EMFILE: raise CommandException('\n'.join( textwrap.wrap( 'Synchronization failed because too many open file handles were ' 'needed while building synchronization state. Please see the ' 'comments about rsync_buffer_lines in your .boto config file for a ' 'possible way to address this problem.'))) raise finally: for chunk_file in chunk_files: try: chunk_file.close() os.remove(chunk_file.name) except: pass
def SetProxyInfo(): """Sets proxy info from boto and environment and converts to httplib2.ProxyInfo. Args: None. Returns: httplib2.ProxyInfo constructed from boto or environment variable string. """ #Defining proxy_type based on httplib2 library, accounting for None entry too. proxy_type_spec = {'socks4': 1, 'socks5': 2, 'http': 3, 'https': 3} boto_proxy_val = config.get('Boto', 'proxy_type', None) #proxy_type defaults to 'http (3)' for backwards compatibility proxy_type = proxy_type_spec.get(boto_proxy_val) or proxy_type_spec['http'] proxy_host = config.get('Boto', 'proxy', None) #For proxy_info below, proxy_rdns fails for socks4 and socks5 so restricting use #to http only proxy_info = httplib2.ProxyInfo( proxy_host=proxy_host, proxy_type=proxy_type, proxy_port=config.getint('Boto', 'proxy_port', 0), proxy_user=config.get('Boto', 'proxy_user', None), proxy_pass=config.get('Boto', 'proxy_pass', None), proxy_rdns=config.getbool( 'Boto', 'proxy_rdns', True if proxy_type == proxy_type_spec['http'] else False)) #Added to force socks proxies not to use rdns if not (proxy_info.proxy_type == proxy_type_spec['http']): proxy_info.proxy_rdns = False if not (proxy_info.proxy_host and proxy_info.proxy_port): # Fall back to using the environment variable. Use only http proxies. for proxy_env_var in ['http_proxy', 'https_proxy', 'HTTPS_PROXY']: if proxy_env_var in os.environ and os.environ[proxy_env_var]: proxy_info = ProxyInfoFromEnvironmentVar(proxy_env_var) # Assume proxy_rnds is True if a proxy environment variable exists. proxy_info.proxy_rdns = config.getbool('Boto', 'proxy_rdns', True) break return proxy_info
def GetNewHttp(http_class=httplib2.Http, **kwargs): """Creates and returns a new httplib2.Http instance. Args: http_class: Optional custom Http class to use. **kwargs: Arguments to pass to http_class constructor. Returns: An initialized httplib2.Http instance. """ proxy_host = config.get('Boto', 'proxy', None) proxy_info = httplib2.ProxyInfo( proxy_type=3, proxy_host=proxy_host, proxy_port=config.getint('Boto', 'proxy_port', 0), proxy_user=config.get('Boto', 'proxy_user', None), proxy_pass=config.get('Boto', 'proxy_pass', None), proxy_rdns=config.get('Boto', 'proxy_rdns', True if proxy_host else False)) if not (proxy_info.proxy_host and proxy_info.proxy_port): # Fall back to using the environment variable. for proxy_env_var in ['http_proxy', 'https_proxy', 'HTTPS_PROXY']: if proxy_env_var in os.environ and os.environ[proxy_env_var]: proxy_info = ProxyInfoFromEnvironmentVar(proxy_env_var) # Assume proxy_rnds is True if a proxy environment variable exists. proxy_info.proxy_rdns = config.get('Boto', 'proxy_rdns', True) break # Some installers don't package a certs file with httplib2, so use the # one included with gsutil. kwargs['ca_certs'] = GetCertsFile() # Use a non-infinite SSL timeout to avoid hangs during network flakiness. kwargs['timeout'] = SSL_TIMEOUT_SEC http = http_class(proxy_info=proxy_info, **kwargs) http.disable_ssl_certificate_validation = (not config.getbool( 'Boto', 'https_validate_certificates')) return http
def __init__(self, host, aws_access_key_id=None, aws_secret_access_key=None, is_secure=True, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None, path='/', provider='aws'): """ :type host: str :param host: The host to make the connection to :keyword str aws_access_key_id: Your AWS Access Key ID (provided by Amazon). If none is specified, the value in your ``AWS_ACCESS_KEY_ID`` environmental variable is used. :keyword str aws_secret_access_key: Your AWS Secret Access Key (provided by Amazon). If none is specified, the value in your ``AWS_SECRET_ACCESS_KEY`` environmental variable is used. :type is_secure: boolean :param is_secure: Whether the connection is over SSL :type https_connection_factory: list or tuple :param https_connection_factory: A pair of an HTTP connection factory and the exceptions to catch. The factory should have a similar interface to L{httplib.HTTPSConnection}. :param str proxy: Address/hostname for a proxy server :type proxy_port: int :param proxy_port: The port to use when connecting over a proxy :type proxy_user: str :param proxy_user: The username to connect with on the proxy :type proxy_pass: str :param proxy_pass: The password to use when connection over a proxy. :type port: int :param port: The port to use to connect """ self.num_retries = 5 # Override passed-in is_secure setting if value was defined in config. if config.has_option('Boto', 'is_secure'): is_secure = config.getboolean('Boto', 'is_secure') self.is_secure = is_secure self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass) # define exceptions from httplib that we want to catch and retry self.http_exceptions = (httplib.HTTPException, socket.error, socket.gaierror) # define values in socket exceptions we don't want to catch self.socket_exception_values = (errno.EINTR,) if https_connection_factory is not None: self.https_connection_factory = https_connection_factory[0] self.http_exceptions += https_connection_factory[1] else: self.https_connection_factory = None if (is_secure): self.protocol = 'https' else: self.protocol = 'http' self.host = host self.path = path if debug: self.debug = debug else: self.debug = config.getint('Boto', 'debug', debug) if port: self.port = port else: self.port = PORTS_BY_SECURITY[is_secure] self.provider = Provider(provider, aws_access_key_id, aws_secret_access_key) # allow config file to override default host if self.provider.host: self.host = self.provider.host # cache up to 20 connections per host, up to 20 hosts self._pool = ConnectionPool(20, 20) self._connection = (self.server_name(), self.is_secure) self._last_rs = None self._auth_handler = auth.get_auth_handler( host, config, self.provider, self._required_auth_capability())
def get_file(self, key, fp, headers, cb=None, num_cb=10, torrent=False, version_id=None, hash_algs=None): """ Retrieves a file from a Key :type key: :class:`boto.s3.key.Key` or subclass :param key: The Key object from which upload is to be downloaded :type fp: file :param fp: File pointer into which data should be downloaded :type headers: string :param: headers to send when retrieving the files :type cb: function :param cb: (optional) a callback function that will be called to report progress on the download. The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted from the storage service and the second representing the total number of bytes that need to be transmitted. :type num_cb: int :param num_cb: (optional) If a callback is specified with the cb parameter this parameter determines the granularity of the callback by defining the maximum number of times the callback will be called during the file transfer. :type torrent: bool :param torrent: Flag for whether to get a torrent for the file :type version_id: string :param version_id: The version ID (optional) :type hash_algs: dictionary :param hash_algs: (optional) Dictionary of hash algorithms and corresponding hashing class that implements update() and digest(). Defaults to {'md5': hashlib/md5.md5}. Raises ResumableDownloadException if a problem occurs during the transfer. """ debug = key.bucket.connection.debug if not headers: headers = {} # Use num-retries from constructor if one was provided; else check # for a value specified in the boto config file; else default to 6. if self.num_retries is None: self.num_retries = config.getint('Boto', 'num_retries', 6) progress_less_iterations = 0 while True: # Retry as long as we're making progress. had_file_bytes_before_attempt = get_cur_file_size(fp) try: self._attempt_resumable_download(key, fp, headers, cb, num_cb, torrent, version_id, hash_algs) # Download succceded, so remove the tracker file (if have one). self._remove_tracker_file() # Previously, check_final_md5() was called here to validate # downloaded file's checksum, however, to be consistent with # non-resumable downloads, this call was removed. Checksum # validation of file contents should be done by the caller. if debug >= 1: print('Resumable download complete.') return except self.RETRYABLE_EXCEPTIONS as e: if debug >= 1: print('Caught exception (%s)' % e.__repr__()) if isinstance(e, IOError) and e.errno == errno.EPIPE: # Broken pipe error causes httplib to immediately # close the socket (http://bugs.python.org/issue5542), # so we need to close and reopen the key before resuming # the download. if isinstance(key, GSKey): key.get_file(fp, headers, cb, num_cb, torrent, version_id, override_num_retries=0, hash_algs=hash_algs) else: key.get_file(fp, headers, cb, num_cb, torrent, version_id, override_num_retries=0) except ResumableDownloadException as e: if (e.disposition == ResumableTransferDisposition.ABORT_CUR_PROCESS): if debug >= 1: print('Caught non-retryable ResumableDownloadException ' '(%s)' % e.message) raise elif (e.disposition == ResumableTransferDisposition.ABORT): if debug >= 1: print('Caught non-retryable ResumableDownloadException ' '(%s); aborting and removing tracker file' % e.message) self._remove_tracker_file() raise else: if debug >= 1: print('Caught ResumableDownloadException (%s) - will ' 'retry' % e.message) # At this point we had a re-tryable failure; see if made progress. if get_cur_file_size(fp) > had_file_bytes_before_attempt: progress_less_iterations = 0 else: progress_less_iterations += 1 if progress_less_iterations > self.num_retries: # Don't retry any longer in the current process. raise ResumableDownloadException( 'Too many resumable download attempts failed without ' 'progress. You might try this download again later', ResumableTransferDisposition.ABORT_CUR_PROCESS) # Close the key, in case a previous download died partway # through and left data in the underlying key HTTP buffer. # Do this within a try/except block in case the connection is # closed (since key.close() attempts to do a final read, in which # case this read attempt would get an IncompleteRead exception, # which we can safely ignore. try: key.close() except httplib.IncompleteRead: pass sleep_time_secs = 2**progress_less_iterations if debug >= 1: print('Got retryable failure (%d progress-less in a row).\n' 'Sleeping %d seconds before re-trying' % (progress_less_iterations, sleep_time_secs)) time.sleep(sleep_time_secs)
def __init__(self, host, aws_access_key_id=None, aws_secret_access_key=None, is_secure=True, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None, path='/', provider=None): """ :type host: string :param host: The host to make the connection to :type aws_access_key_id: string :param aws_access_key_id: AWS Access Key ID (provided by Amazon) :type aws_secret_access_key: string :param aws_secret_access_key: Secret Access Key (provided by Amazon) :type is_secure: boolean :param is_secure: Whether the connection is over SSL :type https_connection_factory: list or tuple :param https_connection_factory: A pair of an HTTP connection factory and the exceptions to catch. The factory should have a similar interface to L{httplib.HTTPSConnection}. :type proxy: :param proxy: :type proxy_port: int :param proxy_port: The port to use when connecting over a proxy :type proxy_user: string :param proxy_user: The username to connect with on the proxy :type proxy_pass: string :param proxy_pass: The password to use when connection over a proxy. :type port: integer :param port: The port to use to connect """ self.num_retries = 5 # Override passed-in is_secure setting if value was defined in config. if config.has_option('Boto', 'is_secure'): is_secure = config.getboolean('Boto', 'is_secure') self.is_secure = is_secure self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass) # define exceptions from httplib that we want to catch and retry self.http_exceptions = (httplib.HTTPException, socket.error, socket.gaierror) # define values in socket exceptions we don't want to catch self.socket_exception_values = (errno.EINTR,) if https_connection_factory is not None: self.https_connection_factory = https_connection_factory[0] self.http_exceptions += https_connection_factory[1] else: self.https_connection_factory = None if (is_secure): self.protocol = 'https' else: self.protocol = 'http' self.host = host self.path = path if debug: self.debug = debug else: self.debug = config.getint('Boto', 'debug', debug) if port: self.port = port else: self.port = PORTS_BY_SECURITY[is_secure] # If credentials have been loaded with provider-dependent ids and # secret keys, use them. if provider: if provider == "google": if (config.has_option('Credentials', 'gs_access_key_id') and config.has_option('Credentials', 'gs_secret_access_key')): aws_access_key_id = config.get( 'Credentials', 'gs_access_key_id') aws_secret_access_key = config.get( 'Credentials', 'gs_secret_access_key') # allow config file to override default host if (config.has_option('Credentials', 'gs_host')): self.host = config.get('Credentials', 'gs_host') elif provider == "amazon": if (config.has_option('Credentials', 'aws_access_key_id') and config.has_option('Credentials', 'aws_secret_access_key')): aws_access_key_id = config.get('Credentials', 'aws_access_key_id') aws_secret_access_key = config.get('Credentials', 'aws_secret_access_key') # allow config file to override default host if (config.has_option('Credentials', 'aws_host')): self.host = config.get('Credentials', 'aws_host') if aws_access_key_id: self.aws_access_key_id = aws_access_key_id elif os.environ.has_key('AWS_ACCESS_KEY_ID'): self.aws_access_key_id = os.environ['AWS_ACCESS_KEY_ID'] elif config.has_option('Credentials', 'aws_access_key_id'): self.aws_access_key_id = config.get('Credentials', 'aws_access_key_id') if aws_secret_access_key: self.aws_secret_access_key = aws_secret_access_key elif os.environ.has_key('AWS_SECRET_ACCESS_KEY'): self.aws_secret_access_key = os.environ['AWS_SECRET_ACCESS_KEY'] elif config.has_option('Credentials', 'aws_secret_access_key'): self.aws_secret_access_key = config.get('Credentials', 'aws_secret_access_key') # initialize an HMAC for signatures, make copies with each request self.hmac = hmac.new(self.aws_secret_access_key, digestmod=sha) if sha256: self.hmac_256 = hmac.new(self.aws_secret_access_key, digestmod=sha256) else: self.hmac_256 = None # cache up to 20 connections per host, up to 20 hosts self._pool = ConnectionPool(20, 20) self._connection = (self.server_name(), self.is_secure) self._last_rs = None
def __init__(self, host, aws_access_key_id=None, aws_secret_access_key=None, is_secure=True, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None, path='/'): """ :type host: string :param host: The host to make the connection to :type aws_access_key_id: string :param aws_access_key_id: AWS Access Key ID (provided by Amazon) :type aws_secret_access_key: string :param aws_secret_access_key: Secret Access Key (provided by Amazon) :type is_secure: boolean :param is_secure: Whether the connection is over SSL :type https_connection_factory: list or tuple :param https_connection_factory: A pair of an HTTP connection factory and the exceptions to catch. The factory should have a similar interface to L{httplib.HTTPSConnection}. :type proxy: :param proxy: :type proxy_port: int :param proxy_port: The port to use when connecting over a proxy :type proxy_user: string :param proxy_user: The username to connect with on the proxy :type proxy_pass: string :param proxy_pass: The password to use when connection over a proxy. :type port: integer :param port: The port to use to connect """ self.num_retries = 5 self.is_secure = is_secure self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass) # define exceptions from httplib that we want to catch and retry self.http_exceptions = (httplib.HTTPException, socket.error, socket.gaierror) # define values in socket exceptions we don't want to catch self.socket_exception_values = (errno.EINTR, ) if https_connection_factory is not None: self.https_connection_factory = https_connection_factory[0] self.http_exceptions += https_connection_factory[1] else: self.https_connection_factory = None if (is_secure): self.protocol = 'https' else: self.protocol = 'http' self.host = host self.path = path if debug: self.debug = debug else: self.debug = config.getint('Boto', 'debug', debug) if port: self.port = port else: self.port = PORTS_BY_SECURITY[is_secure] if aws_access_key_id: self.aws_access_key_id = aws_access_key_id elif os.environ.has_key('AWS_ACCESS_KEY_ID'): self.aws_access_key_id = os.environ['AWS_ACCESS_KEY_ID'] elif config.has_option('Credentials', 'aws_access_key_id'): self.aws_access_key_id = config.get('Credentials', 'aws_access_key_id') if aws_secret_access_key: self.aws_secret_access_key = aws_secret_access_key elif os.environ.has_key('AWS_SECRET_ACCESS_KEY'): self.aws_secret_access_key = os.environ['AWS_SECRET_ACCESS_KEY'] elif config.has_option('Credentials', 'aws_secret_access_key'): self.aws_secret_access_key = config.get('Credentials', 'aws_secret_access_key') # initialize an HMAC for signatures, make copies with each request self.hmac = hmac.new(self.aws_secret_access_key, digestmod=sha) if sha256: self.hmac_256 = hmac.new(self.aws_secret_access_key, digestmod=sha256) else: self.hmac_256 = None # cache up to 20 connections per host, up to 20 hosts self._pool = ConnectionPool(20, 20) self._connection = (self.server_name(), self.is_secure) self._last_rs = None
def __init__( self, host, aws_access_key_id=None, aws_secret_access_key=None, is_secure=True, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None, path="/", provider="aws", ): """ :type host: string :param host: The host to make the connection to :type aws_access_key_id: string :param aws_access_key_id: AWS Access Key ID (provided by Amazon) :type aws_secret_access_key: string :param aws_secret_access_key: Secret Access Key (provided by Amazon) :type is_secure: boolean :param is_secure: Whether the connection is over SSL :type https_connection_factory: list or tuple :param https_connection_factory: A pair of an HTTP connection factory and the exceptions to catch. The factory should have a similar interface to L{httplib.HTTPSConnection}. :type proxy: :param proxy: :type proxy_port: int :param proxy_port: The port to use when connecting over a proxy :type proxy_user: string :param proxy_user: The username to connect with on the proxy :type proxy_pass: string :param proxy_pass: The password to use when connection over a proxy. :type port: integer :param port: The port to use to connect """ self.num_retries = 5 # Override passed-in is_secure setting if value was defined in config. if config.has_option("Boto", "is_secure"): is_secure = config.getboolean("Boto", "is_secure") self.is_secure = is_secure self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass) # define exceptions from httplib that we want to catch and retry self.http_exceptions = (httplib.HTTPException, socket.error, socket.gaierror) # define values in socket exceptions we don't want to catch self.socket_exception_values = (errno.EINTR,) if https_connection_factory is not None: self.https_connection_factory = https_connection_factory[0] self.http_exceptions += https_connection_factory[1] else: self.https_connection_factory = None if is_secure: self.protocol = "https" else: self.protocol = "http" self.host = host self.path = path if debug: self.debug = debug else: self.debug = config.getint("Boto", "debug", debug) if port: self.port = port else: self.port = PORTS_BY_SECURITY[is_secure] self.provider = Provider(provider, aws_access_key_id, aws_secret_access_key) # allow config file to override default host if self.provider.host: self.host = self.provider.host if self.secret_key is None: raise BotoClientError("No credentials have been supplied") # initialize an HMAC for signatures, make copies with each request self.hmac = hmac.new(self.secret_key, digestmod=sha) if sha256: self.hmac_256 = hmac.new(self.secret_key, digestmod=sha256) else: self.hmac_256 = None # cache up to 20 connections per host, up to 20 hosts self._pool = ConnectionPool(20, 20) self._connection = (self.server_name(), self.is_secure) self._last_rs = None
def GetNumRetries(): return config.getint('Boto', 'num_retries', 23)
def send_file(self, key, fp, headers, cb=None, num_cb=10): """ Upload a file to a key into a bucket on GS, using GS resumable upload protocol. :type key: :class:`boto.s3.key.Key` or subclass :param key: The Key object to which data is to be uploaded :type fp: file-like object :param fp: The file pointer to upload :type headers: dict :param headers: The headers to pass along with the PUT request :type cb: function :param cb: a callback function that will be called to report progress on the upload. The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted to GS, and the second representing the total number of bytes that need to be transmitted. :type num_cb: int :param num_cb: (optional) If a callback is specified with the cb parameter, this parameter determines the granularity of the callback by defining the maximum number of times the callback will be called during the file transfer. Providing a negative integer will cause your callback to be called with each buffer read. Raises ResumableUploadException if a problem occurs during the transfer. """ if not headers: headers = {} fp.seek(0, os.SEEK_END) file_length = fp.tell() fp.seek(0) debug = key.bucket.connection.debug # Use num-retries from constructor if one was provided; else check # for a value specified in the boto config file; else default to 5. if self.num_retries is None: self.num_retries = config.getint("Boto", "num_retries", 5) progress_less_iterations = 0 while True: # Retry as long as we're making progress. server_had_bytes_before_attempt = self.server_has_bytes try: etag = self._attempt_resumable_upload(key, fp, file_length, headers, cb, num_cb) # Upload succceded, so remove the tracker file (if have one). self._remove_tracker_file() self._check_final_md5(key, etag) if debug >= 1: print "Resumable upload complete." return except self.RETRYABLE_EXCEPTIONS, e: if debug >= 1: print ("Caught exception (%s)" % e.__repr__()) except ResumableUploadException, e: if e.disposition == ResumableTransferDisposition.ABORT: if debug >= 1: print ("Caught non-retryable ResumableUploadException " "(%s)" % e.message) raise else: if debug >= 1: print ("Caught ResumableUploadException (%s) - will " "retry" % e.message)
def SendFile(self, key, fp, size, headers, canned_acl=None, cb=None, num_cb=10): """Upload a file to a key into a bucket on GS, resumable upload protocol. Args: key: `boto.s3.key.Key` or subclass representing the upload destination. fp: File pointer to upload size: Size of the file to upload. headers: The headers to pass along with the PUT request canned_acl: Optional canned ACL to apply to object. cb: Callback function that will be called to report progress on the upload. The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted to GS, and the second representing the total number of bytes that need to be transmitted. num_cb: (optional) If a callback is specified with the cb parameter, this parameter determines the granularity of the callback by defining the maximum number of times the callback will be called during the file transfer. Providing a negative integer will cause your callback to be called with each buffer read. Raises: ResumableUploadException if a problem occurs during the transfer. """ if not headers: headers = {} # If Content-Type header is present and set to None, remove it. # This is gsutil's way of asking boto to refrain from auto-generating # that header. content_type = 'Content-Type' if content_type in headers and headers[content_type] is None: del headers[content_type] if canned_acl: headers[key.provider.acl_header] = canned_acl headers['User-Agent'] = UserAgent file_length = size debug = key.bucket.connection.debug # Use num-retries from constructor if one was provided; else check # for a value specified in the boto config file; else default to 5. if self.num_retries is None: self.num_retries = config.getint('Boto', 'num_retries', 6) self.progress_less_iterations = 0 while True: # Retry as long as we're making progress. service_had_bytes_before_attempt = self.service_has_bytes try: # Save generation and metageneration in class state so caller # can find these values, for use in preconditions of future # operations on the uploaded object. (_, self.generation, self.metageneration) = ( self._AttemptResumableUpload(key, fp, file_length, headers, cb, num_cb)) key.generation = self.generation if debug >= 1: self.logger.debug('Resumable upload complete.') return except self.RETRYABLE_EXCEPTIONS, e: if debug >= 1: self.logger.debug('Caught exception (%s)', e.__repr__()) if isinstance(e, IOError) and e.errno == errno.EPIPE: # Broken pipe error causes httplib to immediately # close the socket (http://bugs.python.org/issue5542), # so we need to close the connection before we resume # the upload (which will cause a new connection to be # opened the next time an HTTP request is sent). key.bucket.connection.connection.close() except ResumableUploadException, e: self.HandleResumableUploadException(e, debug)
def GetNumRetries(): return config.getint('Boto', 'num_retries', 6)
def get_file(self, key, fp, headers, cb=None, num_cb=10, torrent=False, version_id=None): """ Retrieves a file from a Key :type key: :class:`boto.s3.key.Key` or subclass :param key: The Key object from which upload is to be downloaded :type fp: file :param fp: File pointer into which data should be downloaded :type headers: string :param: headers to send when retrieving the files :type cb: function :param cb: (optional) a callback function that will be called to report progress on the download. The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted from the storage service and the second representing the total number of bytes that need to be transmitted. :type num_cb: int :param num_cb: (optional) If a callback is specified with the cb parameter this parameter determines the granularity of the callback by defining the maximum number of times the callback will be called during the file transfer. :type torrent: bool :param torrent: Flag for whether to get a torrent for the file :type version_id: string :param version_id: The version ID (optional) Raises ResumableDownloadException if a problem occurs during the transfer. """ debug = key.bucket.connection.debug if not headers: headers = {} # Use num-retries from constructor if one was provided; else check # for a value specified in the boto config file; else default to 5. if self.num_retries is None: self.num_retries = config.getint('Boto', 'num_retries', 5) progress_less_iterations = 0 while True: # Retry as long as we're making progress. had_file_bytes_before_attempt = get_cur_file_size(fp) try: self._attempt_resumable_download(key, fp, headers, cb, num_cb, torrent, version_id) # Download succceded, so remove the tracker file (if have one). self._remove_tracker_file() self._check_final_md5(key, fp.name) if debug >= 1: print 'Resumable download complete.' return except self.RETRYABLE_EXCEPTIONS, e: if debug >= 1: print('Caught exception (%s)' % e.__repr__()) except ResumableDownloadException, e: if (e.disposition == ResumableTransferDisposition.ABORT_CUR_PROCESS): if debug >= 1: print( 'Caught non-retryable ResumableDownloadException ' '(%s)' % e.message) raise elif (e.disposition == ResumableTransferDisposition.ABORT): if debug >= 1: print( 'Caught non-retryable ResumableDownloadException ' '(%s); aborting and removing tracker file' % e.message) self._remove_tracker_file() raise else: if debug >= 1: print( 'Caught ResumableDownloadException (%s) - will ' 'retry' % e.message)
def __init__(self, server, aws_access_key_id=None, aws_secret_access_key=None, is_secure=True, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None): """ @type server: string @param server: The server to make the connection to @type aws_access_key_id: string @param aws_access_key_id: AWS Access Key ID (provided by Amazon) @type aws_secret_access_key: string @param aws_secret_access_key: Secret Access Key (provided by Amazon) @type is_secure: boolean @param is_secure: Whether the connection is over SSL @type https_connection_factory: list or tuple @param https_connection_factory: A pair of an HTTP connection factory and the exceptions to catch. The factory should have a similar interface to L{httplib.HTTPSConnection}. @type proxy: @param proxy: @type proxy_port: int @param proxy_port: The port to use when connecting over a proxy @type proxy_user: string @param proxy_user: The username to connect with on the proxy @type proxy_pass: string @param proxy_pass: The password to use when connection over a proxy. @type port: integer @param port: The port to use to connect """ self.num_retries = 5 self.is_secure = is_secure self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass) # define exceptions from httplib that we want to catch and retry self.http_exceptions = (httplib.HTTPException, socket.error, socket.gaierror) # define values in socket exceptions we don't want to catch self.socket_exception_values = (errno.EINTR, ) if https_connection_factory is not None: self.https_connection_factory = https_connection_factory[0] self.http_exceptions += https_connection_factory[1] else: self.https_connection_factory = None if (is_secure): self.protocol = 'https' else: self.protocol = 'http' self.server = server if debug: self.debug = debug else: self.debug = config.getint('Boto', 'debug', debug) if port: self.port = port else: self.port = PORTS_BY_SECURITY[is_secure] if self.port == 80: self.server_name = server else: # This unfortunate little hack can be attributed to # a difference in the 2.6 version of httplib. In old # versions, it would append ":443" to the hostname sent # in the Host header and so we needed to make sure we # did the same when calculating the signature. In 2.6 # it no longer does that. Hence, this kludge. if sys.version[:3] == "2.6" and self.port in [80, 443]: self.server_name = server else: self.server_name = '%s:%d' % (server, self.port) if aws_access_key_id: self.aws_access_key_id = aws_access_key_id elif os.environ.has_key('AWS_ACCESS_KEY_ID'): self.aws_access_key_id = os.environ['AWS_ACCESS_KEY_ID'] elif config.has_option('Credentials', 'aws_access_key_id'): self.aws_access_key_id = config.get('Credentials', 'aws_access_key_id') if aws_secret_access_key: self.aws_secret_access_key = aws_secret_access_key elif os.environ.has_key('AWS_SECRET_ACCESS_KEY'): self.aws_secret_access_key = os.environ['AWS_SECRET_ACCESS_KEY'] elif config.has_option('Credentials', 'aws_secret_access_key'): self.aws_secret_access_key = config.get('Credentials', 'aws_secret_access_key') # initialize an HMAC for signatures, make copies with each request self.hmac = hmac.new(self.aws_secret_access_key, digestmod=sha) if sha256: self.hmac_256 = hmac.new(self.aws_secret_access_key, digestmod=sha256) else: self.hmac_256 = None # cache up to 20 connections self._cache = boto.utils.LRUCache(20) self.refresh_http_connection(self.server, self.is_secure) self._last_rs = None
def send_file(self, key, fp, headers, cb=None, num_cb=10): """ Upload a file to a key into a bucket on GS, using GS resumable upload protocol. :type key: :class:`boto.s3.key.Key` or subclass :param key: The Key object to which data is to be uploaded :type fp: file-like object :param fp: The file pointer to upload :type headers: dict :param headers: The headers to pass along with the PUT request :type cb: function :param cb: a callback function that will be called to report progress on the upload. The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted to GS, and the second representing the total number of bytes that need to be transmitted. :type num_cb: int :param num_cb: (optional) If a callback is specified with the cb parameter, this parameter determines the granularity of the callback by defining the maximum number of times the callback will be called during the file transfer. Providing a negative integer will cause your callback to be called with each buffer read. Raises ResumableUploadException if a problem occurs during the transfer. """ if not headers: headers = {} fp.seek(0, os.SEEK_END) file_length = fp.tell() fp.seek(0) debug = key.bucket.connection.debug # Use num-retries from constructor if one was provided; else check # for a value specified in the boto config file; else default to 5. if self.num_retries is None: self.num_retries = config.getint('Boto', 'num_retries', 5) progress_less_iterations = 0 while True: # Retry as long as we're making progress. server_had_bytes_before_attempt = self.server_has_bytes try: etag = self._attempt_resumable_upload(key, fp, file_length, headers, cb, num_cb) # Upload succceded, so remove the tracker file (if have one). self._remove_tracker_file() self._check_final_md5(key, etag) if debug >= 1: print 'Resumable upload complete.' return except self.RETRYABLE_EXCEPTIONS, e: if debug >= 1: print('Caught exception (%s)' % e.__repr__()) except ResumableUploadException, e: if (e.disposition == ResumableTransferDisposition.ABORT_CUR_PROCESS): if debug >= 1: print('Caught non-retryable ResumableUploadException ' '(%s); aborting but retaining tracker file' % e.message) raise elif (e.disposition == ResumableTransferDisposition.ABORT): if debug >= 1: print('Caught non-retryable ResumableUploadException ' '(%s); aborting and removing tracker file' % e.message) self._remove_tracker_file() raise else: if debug >= 1: print('Caught ResumableUploadException (%s) - will ' 'retry' % e.message)
def get_file(self, key, fp, headers, cb=None, num_cb=10, torrent=False, version_id=None, hash_algs=None): """ Retrieves a file from a Key :type key: :class:`boto.s3.key.Key` or subclass :param key: The Key object from which upload is to be downloaded :type fp: file :param fp: File pointer into which data should be downloaded :type headers: string :param: headers to send when retrieving the files :type cb: function :param cb: (optional) a callback function that will be called to report progress on the download. The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted from the storage service and the second representing the total number of bytes that need to be transmitted. :type num_cb: int :param num_cb: (optional) If a callback is specified with the cb parameter this parameter determines the granularity of the callback by defining the maximum number of times the callback will be called during the file transfer. :type torrent: bool :param torrent: Flag for whether to get a torrent for the file :type version_id: string :param version_id: The version ID (optional) :type hash_algs: dictionary :param hash_algs: (optional) Dictionary of hash algorithms and corresponding hashing class that implements update() and digest(). Defaults to {'md5': hashlib/md5.md5}. Raises ResumableDownloadException if a problem occurs during the transfer. """ debug = key.bucket.connection.debug if not headers: headers = {} # Use num-retries from constructor if one was provided; else check # for a value specified in the boto config file; else default to 5. if self.num_retries is None: self.num_retries = config.getint('Boto', 'num_retries', 5) progress_less_iterations = 0 while True: # Retry as long as we're making progress. had_file_bytes_before_attempt = get_cur_file_size(fp) try: self._attempt_resumable_download(key, fp, headers, cb, num_cb, torrent, version_id, hash_algs) # Download succceded, so remove the tracker file (if have one). self._remove_tracker_file() # Previously, check_final_md5() was called here to validate # downloaded file's checksum, however, to be consistent with # non-resumable downloads, this call was removed. Checksum # validation of file contents should be done by the caller. if debug >= 1: print 'Resumable download complete.' return except self.RETRYABLE_EXCEPTIONS, e: if debug >= 1: print('Caught exception (%s)' % e.__repr__()) if isinstance(e, IOError) and e.errno == errno.EPIPE: # Broken pipe error causes httplib to immediately # close the socket (http://bugs.python.org/issue5542), # so we need to close and reopen the key before resuming # the download. key.get_file(fp, headers, cb, num_cb, torrent, version_id, override_num_retries=0, hash_algs=hash_algs) except ResumableDownloadException, e: if (e.disposition == ResumableTransferDisposition.ABORT_CUR_PROCESS): if debug >= 1: print('Caught non-retryable ResumableDownloadException ' '(%s)' % e.message) raise elif (e.disposition == ResumableTransferDisposition.ABORT): if debug >= 1: print('Caught non-retryable ResumableDownloadException ' '(%s); aborting and removing tracker file' % e.message) self._remove_tracker_file() raise else: if debug >= 1: print('Caught ResumableDownloadException (%s) - will ' 'retry' % e.message)
def _mexe(self, request, sender=None, override_num_retries=None, retry_handler=None): """ mexe - Multi-execute inside a loop, retrying multiple times to handle transient Internet errors by simply trying again. Also handles redirects. This code was inspired by the S3Utils classes posted to the boto-users Google group by Larry Bates. Thanks! """ boto.log.debug('Method: %s' % request.method) boto.log.debug('Path: %s' % request.path) boto.log.debug('Data: %s' % request.body) boto.log.debug('Headers: %s' % request.headers) boto.log.debug('Host: %s' % request.host) response = None body = None e = None if override_num_retries is None: num_retries = config.getint('Boto', 'num_retries', self.num_retries) else: num_retries = override_num_retries i = 0 connection = self.get_http_connection(request.host, self.is_secure) while i <= num_retries: # Use binary exponential backoff to desynchronize client requests next_sleep = random.random() * (2**i) try: # we now re-sign each request before it is retried boto.log.debug('Token: %s' % self.provider.security_token) request.authorize(connection=self) if callable(sender): response = sender(connection, request.method, request.path, request.body, request.headers) else: connection.request(request.method, request.path, request.body, request.headers) response = connection.getresponse() location = response.getheader('location') # -- gross hack -- # httplib gets confused with chunked responses to HEAD requests # so I have to fake it out if request.method == 'HEAD' and getattr( response, 'chunked', False): response.chunked = 0 if callable(retry_handler): status = retry_handler(response, i, next_sleep) if status: msg, i, next_sleep = status if msg: boto.log.debug(msg) time.sleep(next_sleep) continue if response.status == 500 or response.status == 503: msg = 'Received %d response. ' % response.status msg += 'Retrying in %3.1f seconds' % next_sleep boto.log.debug(msg) body = response.read() elif response.status < 300 or response.status >= 400 or \ not location: self.put_http_connection(request.host, self.is_secure, connection) return response else: scheme, request.host, request.path, \ params, query, fragment = urlparse.urlparse(location) if query: request.path += '?' + query msg = 'Redirecting: %s' % scheme + '://' msg += request.host + request.path boto.log.debug(msg) connection = self.get_http_connection( request.host, scheme == 'https') continue except self.http_exceptions, e: for unretryable in self.http_unretryable_exceptions: if isinstance(e, unretryable): boto.log.debug( 'encountered unretryable %s exception, re-raising' % e.__class__.__name__) raise e boto.log.debug('encountered %s exception, reconnecting' % \ e.__class__.__name__) connection = self.new_http_connection(request.host, self.is_secure) time.sleep(next_sleep) i += 1
def _mexe(self, method, path, data, headers, host=None, sender=None, override_num_retries=None): """ mexe - Multi-execute inside a loop, retrying multiple times to handle transient Internet errors by simply trying again. Also handles redirects. This code was inspired by the S3Utils classes posted to the boto-users Google group by Larry Bates. Thanks! """ boto.log.debug('Method: %s' % method) boto.log.debug('Path: %s' % path) boto.log.debug('Data: %s' % data) boto.log.debug('Headers: %s' % headers) boto.log.debug('Host: %s' % host) response = None body = None e = None if override_num_retries is None: num_retries = config.getint('Boto', 'num_retries', self.num_retries) else: num_retries = override_num_retries i = 0 connection = self.get_http_connection(host, self.is_secure) while i <= num_retries: try: if callable(sender): response = sender(connection, method, path, data, headers) else: connection.request(method, path, data, headers) response = connection.getresponse() location = response.getheader('location') # -- gross hack -- # httplib gets confused with chunked responses to HEAD requests # so I have to fake it out if method == 'HEAD' and getattr(response, 'chunked', False): response.chunked = 0 if response.status == 500 or response.status == 503: boto.log.debug( 'received %d response, retrying in %d seconds' % (response.status, 2**i)) body = response.read() elif response.status == 408: body = response.read() print '-------------------------' print ' 4 0 8 ' print 'path=%s' % path print body print '-------------------------' elif response.status < 300 or response.status >= 400 or \ not location: self.put_http_connection(host, self.is_secure, connection) return response else: scheme, host, path, params, query, fragment = \ urlparse.urlparse(location) if query: path += '?' + query boto.log.debug('Redirecting: %s' % scheme + '://' + host + path) connection = self.get_http_connection( host, scheme == 'https') continue except KeyboardInterrupt: sys.exit('Keyboard Interrupt') except self.http_exceptions, e: for unretryable in self.http_unretryable_exceptions: if isinstance(e, unretryable): boto.log.debug( 'encountered unretryable %s exception, re-raising' % e.__class__.__name__) raise e boto.log.debug('encountered %s exception, reconnecting' % \ e.__class__.__name__) connection = self.new_http_connection(host, self.is_secure) time.sleep(2**i) i += 1
def __init__(self, host, aws_access_key_id=None, aws_secret_access_key=None, is_secure=True, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None, path='/', provider='aws'): """ :type host: str :param host: The host to make the connection to :keyword str aws_access_key_id: Your AWS Access Key ID (provided by Amazon). If none is specified, the value in your ``AWS_ACCESS_KEY_ID`` environmental variable is used. :keyword str aws_secret_access_key: Your AWS Secret Access Key (provided by Amazon). If none is specified, the value in your ``AWS_SECRET_ACCESS_KEY`` environmental variable is used. :type is_secure: boolean :param is_secure: Whether the connection is over SSL :type https_connection_factory: list or tuple :param https_connection_factory: A pair of an HTTP connection factory and the exceptions to catch. The factory should have a similar interface to L{httplib.HTTPSConnection}. :param str proxy: Address/hostname for a proxy server :type proxy_port: int :param proxy_port: The port to use when connecting over a proxy :type proxy_user: str :param proxy_user: The username to connect with on the proxy :type proxy_pass: str :param proxy_pass: The password to use when connection over a proxy. :type port: int :param port: The port to use to connect """ self.num_retries = 5 # Override passed-in is_secure setting if value was defined in config. if config.has_option('Boto', 'is_secure'): is_secure = config.getboolean('Boto', 'is_secure') self.is_secure = is_secure # Whether or not to validate server certificates. At some point in the # future, the default should be flipped to true. self.https_validate_certificates = config.getbool( 'Boto', 'https_validate_certificates', False) if self.https_validate_certificates and not HAVE_HTTPS_CONNECTION: raise BotoClientError( "SSL server certificate validation is enabled in boto " "configuration, but Python dependencies required to " "support this feature are not available. Certificate " "validation is only supported when running under Python " "2.6 or later.") self.ca_certificates_file = config.get_value('Boto', 'ca_certificates_file', DEFAULT_CA_CERTS_FILE) self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass) # define exceptions from httplib that we want to catch and retry self.http_exceptions = (httplib.HTTPException, socket.error, socket.gaierror) # define subclasses of the above that are not retryable. self.http_unretryable_exceptions = [] if HAVE_HTTPS_CONNECTION: self.http_unretryable_exceptions.append(ssl.SSLError) self.http_unretryable_exceptions.append( https_connection.InvalidCertificateException) # define values in socket exceptions we don't want to catch self.socket_exception_values = (errno.EINTR, ) if https_connection_factory is not None: self.https_connection_factory = https_connection_factory[0] self.http_exceptions += https_connection_factory[1] else: self.https_connection_factory = None if (is_secure): self.protocol = 'https' else: self.protocol = 'http' self.host = host self.path = path if debug: self.debug = debug else: self.debug = config.getint('Boto', 'debug', debug) if port: self.port = port else: self.port = PORTS_BY_SECURITY[is_secure] # Timeout used to tell httplib how long to wait for socket timeouts. # Default is to leave timeout unchanged, which will in turn result in # the socket's default global timeout being used. To specify a # timeout, set http_socket_timeout in Boto config. Regardless, # timeouts will only be applied if Python is 2.6 or greater. self.http_connection_kwargs = {} if (sys.version_info[0], sys.version_info[1]) >= (2, 6): if config.has_option('Boto', 'http_socket_timeout'): timeout = config.getint('Boto', 'http_socket_timeout') self.http_connection_kwargs['timeout'] = timeout self.provider = Provider(provider, aws_access_key_id, aws_secret_access_key) # allow config file to override default host if self.provider.host: self.host = self.provider.host # cache up to 20 connections per host, up to 20 hosts self._pool = ConnectionPool(20, 20) self._connection = (self.server_name(), self.is_secure) self._last_rs = None self._auth_handler = auth.get_auth_handler( host, config, self.provider, self._required_auth_capability())
def ResumableThreshold(): return config.getint('GSUtil', 'resumable_threshold', TWO_MB)
def _mexe(self, request, sender=None, override_num_retries=None, retry_handler=None): """ mexe - Multi-execute inside a loop, retrying multiple times to handle transient Internet errors by simply trying again. Also handles redirects. This code was inspired by the S3Utils classes posted to the boto-users Google group by Larry Bates. Thanks! """ boto.log.debug("Method: %s" % request.method) boto.log.debug("Path: %s" % request.path) boto.log.debug("Data: %s" % request.body) boto.log.debug("Headers: %s" % request.headers) boto.log.debug("Host: %s" % request.host) boto.log.debug("Params: %s" % request.params) response = None body = None e = None if override_num_retries is None: num_retries = config.getint("Boto", "num_retries", self.num_retries) else: num_retries = override_num_retries i = 0 connection = self.get_http_connection(request.host, self.is_secure) while i <= num_retries: # Use binary exponential backoff to desynchronize client requests. next_sleep = random.random() * (2 ** i) try: # we now re-sign each request before it is retried boto.log.debug("Token: %s" % self.provider.security_token) request.authorize(connection=self) if callable(sender): response = sender(connection, request.method, request.path, request.body, request.headers) else: connection.request(request.method, request.path, request.body, request.headers) response = connection.getresponse() location = response.getheader("location") # -- gross hack -- # httplib gets confused with chunked responses to HEAD requests # so I have to fake it out if request.method == "HEAD" and getattr(response, "chunked", False): response.chunked = 0 if callable(retry_handler): status = retry_handler(response, i, next_sleep) if status: msg, i, next_sleep = status if msg: boto.log.debug(msg) time.sleep(next_sleep) continue if response.status == 500 or response.status == 503: msg = "Received %d response. " % response.status msg += "Retrying in %3.1f seconds" % next_sleep boto.log.debug(msg) body = response.read() elif response.status < 300 or response.status >= 400 or not location: self.put_http_connection(request.host, self.is_secure, connection) return response else: scheme, request.host, request.path, params, query, fragment = urlparse.urlparse(location) if query: request.path += "?" + query msg = "Redirecting: %s" % scheme + "://" msg += request.host + request.path boto.log.debug(msg) connection = self.get_http_connection(request.host, scheme == "https") response = None continue except self.http_exceptions, e: for unretryable in self.http_unretryable_exceptions: if isinstance(e, unretryable): boto.log.debug("encountered unretryable %s exception, re-raising" % e.__class__.__name__) raise e boto.log.debug("encountered %s exception, reconnecting" % e.__class__.__name__) connection = self.new_http_connection(request.host, self.is_secure) time.sleep(next_sleep) i += 1
def __init__(self, host, aws_access_key_id=None, aws_secret_access_key=None, is_secure=True, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None, path='/', provider='aws'): """ :type host: str :param host: The host to make the connection to :keyword str aws_access_key_id: Your AWS Access Key ID (provided by Amazon). If none is specified, the value in your ``AWS_ACCESS_KEY_ID`` environmental variable is used. :keyword str aws_secret_access_key: Your AWS Secret Access Key (provided by Amazon). If none is specified, the value in your ``AWS_SECRET_ACCESS_KEY`` environmental variable is used. :type is_secure: boolean :param is_secure: Whether the connection is over SSL :type https_connection_factory: list or tuple :param https_connection_factory: A pair of an HTTP connection factory and the exceptions to catch. The factory should have a similar interface to L{httplib.HTTPSConnection}. :param str proxy: Address/hostname for a proxy server :type proxy_port: int :param proxy_port: The port to use when connecting over a proxy :type proxy_user: str :param proxy_user: The username to connect with on the proxy :type proxy_pass: str :param proxy_pass: The password to use when connection over a proxy. :type port: int :param port: The port to use to connect """ self.num_retries = 5 # Override passed-in is_secure setting if value was defined in config. if config.has_option('Boto', 'is_secure'): is_secure = config.getboolean('Boto', 'is_secure') self.is_secure = is_secure # Whether or not to validate server certificates. At some point in the # future, the default should be flipped to true. self.https_validate_certificates = config.getbool( 'Boto', 'https_validate_certificates', False) if self.https_validate_certificates and not HAVE_HTTPS_CONNECTION: raise BotoClientError( "SSL server certificate validation is enabled in boto " "configuration, but Python dependencies required to " "support this feature are not available. Certificate " "validation is only supported when running under Python " "2.6 or later.") self.ca_certificates_file = config.get_value( 'Boto', 'ca_certificates_file', DEFAULT_CA_CERTS_FILE) self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass) # define exceptions from httplib that we want to catch and retry self.http_exceptions = (httplib.HTTPException, socket.error, socket.gaierror) # define subclasses of the above that are not retryable. self.http_unretryable_exceptions = [] if HAVE_HTTPS_CONNECTION: self.http_unretryable_exceptions.append(ssl.SSLError) self.http_unretryable_exceptions.append( https_connection.InvalidCertificateException) # define values in socket exceptions we don't want to catch self.socket_exception_values = (errno.EINTR,) if https_connection_factory is not None: self.https_connection_factory = https_connection_factory[0] self.http_exceptions += https_connection_factory[1] else: self.https_connection_factory = None if (is_secure): self.protocol = 'https' else: self.protocol = 'http' self.host = host self.path = path if debug: self.debug = debug else: self.debug = config.getint('Boto', 'debug', debug) if port: self.port = port else: self.port = PORTS_BY_SECURITY[is_secure] # Timeout used to tell httplib how long to wait for socket timeouts. # Default is to leave timeout unchanged, which will in turn result in # the socket's default global timeout being used. To specify a # timeout, set http_socket_timeout in Boto config. Regardless, # timeouts will only be applied if Python is 2.6 or greater. self.http_connection_kwargs = {} if (sys.version_info[0], sys.version_info[1]) >= (2, 6): if config.has_option('Boto', 'http_socket_timeout'): timeout = config.getint('Boto', 'http_socket_timeout') self.http_connection_kwargs['timeout'] = timeout self.provider = Provider(provider, aws_access_key_id, aws_secret_access_key) # allow config file to override default host if self.provider.host: self.host = self.provider.host self._pool = ConnectionPool() self._connection = (self.server_name(), self.is_secure) self._last_rs = None self._auth_handler = auth.get_auth_handler( host, config, self.provider, self._required_auth_capability())
def send_file(self, key, fp, headers, cb=None, num_cb=10): """ Upload a file to a key into a bucket on GS, using GS resumable upload protocol. :type key: :class:`boto.s3.key.Key` or subclass :param key: The Key object to which data is to be uploaded :type fp: file-like object :param fp: The file pointer to upload :type headers: dict :param headers: The headers to pass along with the PUT request :type cb: function :param cb: a callback function that will be called to report progress on the upload. The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted to GS, and the second representing the total number of bytes that need to be transmitted. :type num_cb: int :param num_cb: (optional) If a callback is specified with the cb parameter, this parameter determines the granularity of the callback by defining the maximum number of times the callback will be called during the file transfer. Providing a negative integer will cause your callback to be called with each buffer read. Raises ResumableUploadException if a problem occurs during the transfer. """ if not headers: headers = {} fp.seek(0, os.SEEK_END) file_length = fp.tell() fp.seek(0) debug = key.bucket.connection.debug # Use num-retries from constructor if one was provided; else check # for a value specified in the boto config file; else default to 5. if self.num_retries is None: self.num_retries = config.getint('Boto', 'num_retries', 5) progress_less_iterations = 0 while True: # Retry as long as we're making progress. server_had_bytes_before_attempt = self.server_has_bytes try: etag = self._attempt_resumable_upload(key, fp, file_length, headers, cb, num_cb) # Upload succceded, so remove the tracker file (if have one). self._remove_tracker_file() self._check_final_md5(key, etag) if debug >= 1: print 'Resumable upload complete.' return except self.RETRYABLE_EXCEPTIONS, e: if debug >= 1: print('Caught exception (%s)' % e.__repr__()) if isinstance(e, IOError) and e.errno == errno.EPIPE: # Broken pipe error causes httplib to immediately # close the socket (http://bugs.python.org/issue5542), # so we need to close the connection before we resume # the upload (which will cause a new connection to be # opened the next time an HTTP request is sent). key.bucket.connection.connection.close() except ResumableUploadException, e: if (e.disposition == ResumableTransferDisposition.ABORT_CUR_PROCESS): if debug >= 1: print('Caught non-retryable ResumableUploadException ' '(%s); aborting but retaining tracker file' % e.message) raise elif (e.disposition == ResumableTransferDisposition.ABORT): if debug >= 1: print('Caught non-retryable ResumableUploadException ' '(%s); aborting and removing tracker file' % e.message) self._remove_tracker_file() raise else: if debug >= 1: print('Caught ResumableUploadException (%s) - will ' 'retry' % e.message)
def __init__(self, host, aws_access_key_id=None, aws_secret_access_key=None, is_secure=True, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None, path='/', provider='aws', security_token=None, suppress_consec_slashes=True, validate_certs=True, profile_name=None): """ :type host: str :param host: The host to make the connection to :keyword str aws_access_key_id: Your AWS Access Key ID (provided by Amazon). If none is specified, the value in your ``AWS_ACCESS_KEY_ID`` environmental variable is used. :keyword str aws_secret_access_key: Your AWS Secret Access Key (provided by Amazon). If none is specified, the value in your ``AWS_SECRET_ACCESS_KEY`` environmental variable is used. :keyword str security_token: The security token associated with temporary credentials issued by STS. Optional unless using temporary credentials. If none is specified, the environment variable ``AWS_SECURITY_TOKEN`` is used if defined. :type is_secure: boolean :param is_secure: Whether the connection is over SSL :type https_connection_factory: list or tuple :param https_connection_factory: A pair of an HTTP connection factory and the exceptions to catch. The factory should have a similar interface to L{http_client.HTTPSConnection}. :param str proxy: Address/hostname for a proxy server :type proxy_port: int :param proxy_port: The port to use when connecting over a proxy :type proxy_user: str :param proxy_user: The username to connect with on the proxy :type proxy_pass: str :param proxy_pass: The password to use when connection over a proxy. :type port: int :param port: The port to use to connect :type suppress_consec_slashes: bool :param suppress_consec_slashes: If provided, controls whether consecutive slashes will be suppressed in key paths. :type validate_certs: bool :param validate_certs: Controls whether SSL certificates will be validated or not. Defaults to True. :type profile_name: str :param profile_name: Override usual Credentials section in config file to use a named set of keys instead. """ self.suppress_consec_slashes = suppress_consec_slashes self.num_retries = 6 # Override passed-in is_secure setting if value was defined in config. if config.has_option('Boto', 'is_secure'): is_secure = config.getboolean('Boto', 'is_secure') self.is_secure = is_secure # Whether or not to validate server certificates. # The default is now to validate certificates. This can be # overridden in the boto config file are by passing an # explicit validate_certs parameter to the class constructor. self.https_validate_certificates = config.getbool( 'Boto', 'https_validate_certificates', validate_certs) if self.https_validate_certificates and not HAVE_HTTPS_CONNECTION: raise BotoClientError( "SSL server certificate validation is enabled in boto " "configuration, but Python dependencies required to " "support this feature are not available. Certificate " "validation is only supported when running under Python " "2.6 or later.") certs_file = config.get_value( 'Boto', 'ca_certificates_file', DEFAULT_CA_CERTS_FILE) if certs_file == 'system': certs_file = None self.ca_certificates_file = certs_file if port: self.port = port else: self.port = PORTS_BY_SECURITY[is_secure] self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass) # define exceptions from http_client that we want to catch and retry self.http_exceptions = (http_client.HTTPException, socket.error, socket.gaierror, http_client.BadStatusLine) # define subclasses of the above that are not retryable. self.http_unretryable_exceptions = [] if HAVE_HTTPS_CONNECTION: self.http_unretryable_exceptions.append( https_connection.InvalidCertificateException) # define values in socket exceptions we don't want to catch self.socket_exception_values = (errno.EINTR,) if https_connection_factory is not None: self.https_connection_factory = https_connection_factory[0] self.http_exceptions += https_connection_factory[1] else: self.https_connection_factory = None if (is_secure): self.protocol = 'https' else: self.protocol = 'http' self.host = host self.path = path # if the value passed in for debug if not isinstance(debug, six.integer_types): debug = 0 self.debug = config.getint('Boto', 'debug', debug) self.host_header = None # Timeout used to tell http_client how long to wait for socket timeouts. # Default is to leave timeout unchanged, which will in turn result in # the socket's default global timeout being used. To specify a # timeout, set http_socket_timeout in Boto config. Regardless, # timeouts will only be applied if Python is 2.6 or greater. self.http_connection_kwargs = {} if (sys.version_info[0], sys.version_info[1]) >= (2, 6): # If timeout isn't defined in boto config file, use 70 second # default as recommended by # http://docs.aws.amazon.com/amazonswf/latest/apireference/API_PollForActivityTask.html self.http_connection_kwargs['timeout'] = config.getint( 'Boto', 'http_socket_timeout', 70) if isinstance(provider, Provider): # Allow overriding Provider self.provider = provider else: self._provider_type = provider self.provider = Provider(self._provider_type, aws_access_key_id, aws_secret_access_key, security_token, profile_name) # Allow config file to override default host, port, and host header. if self.provider.host: self.host = self.provider.host if self.provider.port: self.port = self.provider.port if self.provider.host_header: self.host_header = self.provider.host_header self._pool = ConnectionPool() self._connection = (self.host, self.port, self.is_secure) self._last_rs = None self._auth_handler = auth.get_auth_handler( host, config, self.provider, self._required_auth_capability()) if getattr(self, 'AuthServiceName', None) is not None: self.auth_service_name = self.AuthServiceName self.request_hook = None
def GetMaxRetryDelay(): return config.getint('Boto', 'max_retry_delay', 60)
def ResumableThreshold(): return config.getint('GSUtil', 'resumable_threshold', EIGHT_MIB)
def _mexe(self, method, path, data, headers, host=None, sender=None): """ mexe - Multi-execute inside a loop, retrying multiple times to handle transient Internet errors by simply trying again. Also handles redirects. This code was inspired by the S3Utils classes posted to the boto-users Google group by Larry Bates. Thanks! """ boto.log.debug('Method: %s' % method) boto.log.debug('Path: %s' % path) boto.log.debug('Data: %s' % data) boto.log.debug('Headers: %s' % headers) boto.log.debug('Host: %s' % host) response = None body = None e = None num_retries = config.getint('Boto', 'num_retries', self.num_retries) i = 0 connection = self.get_http_connection(host, self.is_secure) while i <= num_retries: try: if callable(sender): response = sender(connection, method, path, data, headers) else: connection.request(method, path, data, headers) response = connection.getresponse() location = response.getheader('location') # -- gross hack -- # httplib gets confused with chunked responses to HEAD requests # so I have to fake it out if method == 'HEAD' and response.chunked: response.chunked = 0 if response.status == 500 or response.status == 503: boto.log.debug('received %d response, retrying in %d seconds' % (response.status, 2**i)) body = response.read() elif response.status == 408: body = response.read() print '-------------------------' print ' 4 0 8 ' print 'path=%s' % path print body print '-------------------------' elif response.status < 300 or response.status >= 400 or \ not location: return response else: scheme, host, path, params, query, fragment = \ urlparse.urlparse(location) if query: path += '?' + query boto.log.debug('Redirecting: %s' % scheme + '://' + host + path) connection = self.get_http_connection(host, scheme == 'https') continue except KeyboardInterrupt: sys.exit('Keyboard Interrupt') except self.http_exceptions, e: boto.log.debug('encountered %s exception, reconnecting' % \ e.__class__.__name__) connection = self.refresh_http_connection(host, self.is_secure) time.sleep(2**i) i += 1
def _mexe(self, request, sender=None, override_num_retries=None): """ mexe - Multi-execute inside a loop, retrying multiple times to handle transient Internet errors by simply trying again. Also handles redirects. This code was inspired by the S3Utils classes posted to the boto-users Google group by Larry Bates. Thanks! """ boto.log.debug('Method: %s' % request.method) boto.log.debug('Path: %s' % request.path) boto.log.debug('Data: %s' % request.body) boto.log.debug('Headers: %s' % request.headers) boto.log.debug('Host: %s' % request.host) response = None body = None e = None if override_num_retries is None: num_retries = config.getint('Boto', 'num_retries', self.num_retries) else: num_retries = override_num_retries i = 0 connection = self.get_http_connection(request.host, self.is_secure) while i <= num_retries: try: # we now re-sign each request before it is retried request.authorize(connection=self) if callable(sender): response = sender(connection, request.method, request.path, request.body, request.headers) else: connection.request(request.method, request.path, request.body, request.headers) response = connection.getresponse() location = response.getheader('location') # -- gross hack -- # httplib gets confused with chunked responses to HEAD requests # so I have to fake it out if request.method == 'HEAD' and getattr(response, 'chunked', False): response.chunked = 0 if response.status == 500 or response.status == 503: boto.log.debug('received %d response, retrying in %d seconds' % (response.status, 2 ** i)) body = response.read() elif response.status < 300 or response.status >= 400 or \ not location: self.put_http_connection(request.host, self.is_secure, connection) return response else: scheme, request.host, request.path, params, query, fragment = \ urlparse.urlparse(location) if query: request.path += '?' + query boto.log.debug('Redirecting: %s' % scheme + '://' + request.host + request.path) connection = self.get_http_connection(request.host, scheme == 'https') continue except KeyboardInterrupt: sys.exit('Keyboard Interrupt') except self.http_exceptions, e: for unretryable in self.http_unretryable_exceptions: if isinstance(e, unretryable): boto.log.debug( 'encountered unretryable %s exception, re-raising' % e.__class__.__name__) raise e boto.log.debug('encountered %s exception, reconnecting' % \ e.__class__.__name__) connection = self.new_http_connection(request.host, self.is_secure) time.sleep(2 ** i) i += 1
def __init__(self, server, aws_access_key_id=None, aws_secret_access_key=None, is_secure=True, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None): """ @type server: string @param server: The server to make the connection to @type aws_access_key_id: string @param aws_access_key_id: AWS Access Key ID (provided by Amazon) @type aws_secret_access_key: string @param aws_secret_access_key: Secret Access Key (provided by Amazon) @type is_secure: boolean @param is_secure: Whether the connection is over SSL @type https_connection_factory: list or tuple @param https_connection_factory: A pair of an HTTP connection factory and the exceptions to catch. The factory should have a similar interface to L{httplib.HTTPSConnection}. @type proxy: @param proxy: @type proxy_port: int @param proxy_port: The port to use when connecting over a proxy @type proxy_user: string @param proxy_user: The username to connect with on the proxy @type proxy_pass: string @param proxy_pass: The password to use when connection over a proxy. @type port: integer @param port: The port to use to connect """ self.num_retries = 5 self.is_secure = is_secure self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass) # define exceptions from httplib that we want to catch and retry self.http_exceptions = (httplib.HTTPException, socket.error, socket.gaierror) # define values in socket exceptions we don't want to catch self.socket_exception_values = (errno.EINTR,) if https_connection_factory is not None: self.https_connection_factory = https_connection_factory[0] self.http_exceptions += https_connection_factory[1] else: self.https_connection_factory = None if (is_secure): self.protocol = 'https' else: self.protocol = 'http' self.server = server if debug: self.debug = debug else: self.debug = config.getint('Boto', 'debug', debug) if port: self.port = port else: self.port = PORTS_BY_SECURITY[is_secure] if self.port == 80: self.server_name = server else: self.server_name = '%s:%d' % (server, self.port) if aws_access_key_id: self.aws_access_key_id = aws_access_key_id elif os.environ.has_key('AWS_ACCESS_KEY_ID'): self.aws_access_key_id = os.environ['AWS_ACCESS_KEY_ID'] elif config.has_option('Credentials', 'aws_access_key_id'): self.aws_access_key_id = config.get('Credentials', 'aws_access_key_id') if aws_secret_access_key: self.aws_secret_access_key = aws_secret_access_key elif os.environ.has_key('AWS_SECRET_ACCESS_KEY'): self.aws_secret_access_key = os.environ['AWS_SECRET_ACCESS_KEY'] elif config.has_option('Credentials', 'aws_secret_access_key'): self.aws_secret_access_key = config.get('Credentials', 'aws_secret_access_key') # initialize an HMAC for signatures, make copies with each request self.hmac = hmac.new(self.aws_secret_access_key, digestmod=sha) if sha256: self.hmac_256 = hmac.new(self.aws_secret_access_key, digestmod=sha256) else: self.hmac_256 = None # cache up to 20 connections self._cache = boto.utils.LRUCache(20) self.refresh_http_connection(self.server, self.is_secure) self._last_rs = None
def _mexe(self, request, sender=None, override_num_retries=None, retry_handler=None): """ mexe - Multi-execute inside a loop, retrying multiple times to handle transient Internet errors by simply trying again. Also handles redirects. This code was inspired by the S3Utils classes posted to the boto-users Google group by Larry Bates. Thanks! """ boto.log.debug('Method: %s' % request.method) boto.log.debug('Path: %s' % request.path) boto.log.debug('Data: %s' % request.body) boto.log.debug('Headers: %s' % request.headers) boto.log.debug('Host: %s' % request.host) boto.log.debug('Port: %s' % request.port) boto.log.debug('Params: %s' % request.params) response = None body = None e = None if override_num_retries is None: num_retries = config.getint('Boto', 'num_retries', self.num_retries) else: num_retries = override_num_retries i = 0 connection = self.get_http_connection(request.host, request.port, self.is_secure) # Convert body to bytes if needed if not isinstance(request.body, bytes) and hasattr(request.body, 'encode'): request.body = request.body.encode('utf-8') while i <= num_retries: # Use binary exponential backoff to desynchronize client requests. next_sleep = min(random.random() * (2 ** i), boto.config.get('Boto', 'max_retry_delay', 60)) try: # we now re-sign each request before it is retried boto.log.debug('Token: %s' % self.provider.security_token) request.authorize(connection=self) # Only force header for non-s3 connections, because s3 uses # an older signing method + bucket resource URLs that include # the port info. All others should be now be up to date and # not include the port. if 's3' not in self._required_auth_capability(): if not getattr(self, 'anon', False): self.set_host_header(request) boto.log.debug('Final headers: %s' % request.headers) request.start_time = datetime.now() if callable(sender): response = sender(connection, request.method, request.path, request.body, request.headers) else: connection.request(request.method, request.path, request.body, request.headers) response = connection.getresponse() boto.log.debug('Response headers: %s' % response.getheaders()) location = response.getheader('location') # -- gross hack -- # http_client gets confused with chunked responses to HEAD requests # so I have to fake it out if request.method == 'HEAD' and getattr(response, 'chunked', False): response.chunked = 0 if callable(retry_handler): status = retry_handler(response, i, next_sleep) if status: msg, i, next_sleep = status if msg: boto.log.debug(msg) time.sleep(next_sleep) continue if response.status in [500, 502, 503, 504]: msg = 'Received %d response. ' % response.status msg += 'Retrying in %3.1f seconds' % next_sleep boto.log.debug(msg) body = response.read() if isinstance(body, bytes): body = body.decode('utf-8') elif response.status < 300 or response.status >= 400 or \ not location: # don't return connection to the pool if response contains # Connection:close header, because the connection has been # closed and default reconnect behavior may do something # different than new_http_connection. Also, it's probably # less efficient to try to reuse a closed connection. conn_header_value = response.getheader('connection') if conn_header_value == 'close': connection.close() else: self.put_http_connection(request.host, request.port, self.is_secure, connection) if self.request_hook is not None: self.request_hook.handle_request_data(request, response) return response else: scheme, request.host, request.path, \ params, query, fragment = urlparse(location) if query: request.path += '?' + query # urlparse can return both host and port in netloc, so if # that's the case we need to split them up properly if ':' in request.host: request.host, request.port = request.host.split(':', 1) msg = 'Redirecting: %s' % scheme + '://' msg += request.host + request.path boto.log.debug(msg) connection = self.get_http_connection(request.host, request.port, scheme == 'https') response = None continue except PleaseRetryException as e: boto.log.debug('encountered a retry exception: %s' % e) connection = self.new_http_connection(request.host, request.port, self.is_secure) response = e.response except self.http_exceptions as e: for unretryable in self.http_unretryable_exceptions: if isinstance(e, unretryable): boto.log.debug( 'encountered unretryable %s exception, re-raising' % e.__class__.__name__) raise boto.log.debug('encountered %s exception, reconnecting' % \ e.__class__.__name__) connection = self.new_http_connection(request.host, request.port, self.is_secure) time.sleep(next_sleep) i += 1 # If we made it here, it's because we have exhausted our retries # and stil haven't succeeded. So, if we have a response object, # use it to raise an exception. # Otherwise, raise the exception that must have already happened. if self.request_hook is not None: self.request_hook.handle_request_data(request, response, error=True) if response: raise BotoServerError(response.status, response.reason, body) elif e: raise else: msg = 'Please report this exception as a Boto Issue!' raise BotoClientError(msg)
def send_file(self, key, fp, headers, cb=None, num_cb=10): """ Upload a file to a key into a bucket on GS, using GS resumable upload protocol. :type key: :class:`boto.s3.key.Key` or subclass :param key: The Key object to which data is to be uploaded :type fp: file-like object :param fp: The file pointer to upload :type headers: dict :param headers: The headers to pass along with the PUT request :type cb: function :param cb: a callback function that will be called to report progress on the upload. The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted to GS, and the second representing the total number of bytes that need to be transmitted. :type num_cb: int :param num_cb: (optional) If a callback is specified with the cb parameter, this parameter determines the granularity of the callback by defining the maximum number of times the callback will be called during the file transfer. Providing a negative integer will cause your callback to be called with each buffer read. Raises ResumableUploadException if a problem occurs during the transfer. """ if not headers: headers = {} # If Content-Type header is present and set to None, remove it. # This is gsutil's way of asking boto to refrain from auto-generating # that header. CT = 'Content-Type' if CT in headers and headers[CT] is None: del headers[CT] headers['User-Agent'] = UserAgent # Determine file size different ways for case where fp is actually a # wrapper around a Key vs an actual file. if isinstance(fp, KeyFile): file_length = fp.getkey().size else: fp.seek(0, os.SEEK_END) file_length = fp.tell() fp.seek(0) debug = key.bucket.connection.debug # Compute the MD5 checksum on the fly. self.md5sum = md5() # Use num-retries from constructor if one was provided; else check # for a value specified in the boto config file; else default to 5. if self.num_retries is None: self.num_retries = config.getint('Boto', 'num_retries', 6) self.progress_less_iterations = 0 while True: # Retry as long as we're making progress. server_had_bytes_before_attempt = self.server_has_bytes self.md5sum_before_attempt = self.md5sum.copy() try: # Save generation and meta_generation in class state so caller # can find these values, for use in preconditions of future # operations on the uploaded object. (etag, self.generation, self.meta_generation) = ( self._attempt_resumable_upload(key, fp, file_length, headers, cb, num_cb, self.md5sum)) # Get the final md5 for the uploaded content. hd = self.md5sum.hexdigest() key.md5, key.base64md5 = key.get_md5_from_hexdigest(hd) # Upload succceded, so remove the tracker file (if have one). self._remove_tracker_file() self._check_final_md5(key, etag) if debug >= 1: print 'Resumable upload complete.' return except self.RETRYABLE_EXCEPTIONS, e: if debug >= 1: print('Caught exception (%s)' % e.__repr__()) if isinstance(e, IOError) and e.errno == errno.EPIPE: # Broken pipe error causes httplib to immediately # close the socket (http://bugs.python.org/issue5542), # so we need to close the connection before we resume # the upload (which will cause a new connection to be # opened the next time an HTTP request is sent). key.bucket.connection.connection.close() except ResumableUploadException, e: self.handle_resumable_upload_exception(e, debug)
def send_file(self, key, fp, headers, cb=None, num_cb=10): """ Upload a file to a key into a bucket on GS, using GS resumable upload protocol. :type key: :class:`boto.s3.key.Key` or subclass :param key: The Key object to which data is to be uploaded :type fp: file-like object :param fp: The file pointer to upload :type headers: dict :param headers: The headers to pass along with the PUT request :type cb: function :param cb: a callback function that will be called to report progress on the upload. The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted to GS, and the second representing the total number of bytes that need to be transmitted. :type num_cb: int :param num_cb: (optional) If a callback is specified with the cb parameter, this parameter determines the granularity of the callback by defining the maximum number of times the callback will be called during the file transfer. Providing a negative integer will cause your callback to be called with each buffer read. Raises ResumableUploadException if a problem occurs during the transfer. """ if not headers: headers = {} # If Content-Type header is present and set to None, remove it. # This is gsutil's way of asking boto to refrain from auto-generating # that header. CT = 'Content-Type' if CT in headers and headers[CT] is None: del headers[CT] # Determine file size different ways for case where fp is actually a # wrapper around a Key vs an actual file. if isinstance(fp, KeyFile): file_length = fp.getkey().size else: fp.seek(0, os.SEEK_END) file_length = fp.tell() fp.seek(0) debug = key.bucket.connection.debug # Compute the MD5 checksum on the fly. self.md5sum = md5() # Use num-retries from constructor if one was provided; else check # for a value specified in the boto config file; else default to 5. if self.num_retries is None: self.num_retries = config.getint('Boto', 'num_retries', 6) self.progress_less_iterations = 0 while True: # Retry as long as we're making progress. server_had_bytes_before_attempt = self.server_has_bytes self.md5sum_before_attempt = self.md5sum.copy() try: etag = self._attempt_resumable_upload(key, fp, file_length, headers, cb, num_cb, self.md5sum) # Get the final md5 for the uploaded content. hd = self.md5sum.hexdigest() key.md5, key.base64md5 = key.get_md5_from_hexdigest(hd) # Upload succceded, so remove the tracker file (if have one). self._remove_tracker_file() self._check_final_md5(key, etag) if debug >= 1: print('Resumable upload complete.') return except self.RETRYABLE_EXCEPTIONS as e: if debug >= 1: print('Caught exception (%s)' % e.__repr__()) if isinstance(e, IOError) and e.errno == errno.EPIPE: # Broken pipe error causes httplib to immediately # close the socket (http://bugs.python.org/issue5542), # so we need to close the connection before we resume # the upload (which will cause a new connection to be # opened the next time an HTTP request is sent). key.bucket.connection.connection.close() except ResumableUploadException as e: self.handle_resumable_upload_exception(e, debug) self.track_progress_less_iterations( server_had_bytes_before_attempt, True, debug)
def get_file(self, key, fp, headers, cb=None, num_cb=10, torrent=False, version_id=None): """ Retrieves a file from a Key :type key: :class:`boto.s3.key.Key` or subclass :param key: The Key object from which upload is to be downloaded :type fp: file :param fp: File pointer into which data should be downloaded :type headers: string :param: headers to send when retrieving the files :type cb: function :param cb: (optional) a callback function that will be called to report progress on the download. The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted from the storage service and the second representing the total number of bytes that need to be transmitted. :type num_cb: int :param num_cb: (optional) If a callback is specified with the cb parameter this parameter determines the granularity of the callback by defining the maximum number of times the callback will be called during the file transfer. :type torrent: bool :param torrent: Flag for whether to get a torrent for the file :type version_id: string :param version_id: The version ID (optional) Raises ResumableDownloadException if a problem occurs during the transfer. """ debug = key.bucket.connection.debug if not headers: headers = {} # Use num-retries from constructor if one was provided; else check # for a value specified in the boto config file; else default to 5. if self.num_retries is None: self.num_retries = config.getint('Boto', 'num_retries', 5) progress_less_iterations = 0 while True: # Retry as long as we're making progress. had_file_bytes_before_attempt = get_cur_file_size(fp) try: self._attempt_resumable_download(key, fp, headers, cb, num_cb, torrent, version_id) # Download succceded, so remove the tracker file (if have one). self._remove_tracker_file() self._check_final_md5(key, fp.name) if debug >= 1: print 'Resumable download complete.' return except self.RETRYABLE_EXCEPTIONS, e: if debug >= 1: print('Caught exception (%s)' % e.__repr__()) except ResumableDownloadException, e: if (e.disposition == ResumableTransferDisposition.ABORT_CUR_PROCESS): if debug >= 1: print('Caught non-retryable ResumableDownloadException ' '(%s)' % e.message) raise elif (e.disposition == ResumableTransferDisposition.ABORT): if debug >= 1: print('Caught non-retryable ResumableDownloadException ' '(%s); aborting and removing tracker file' % e.message) self._remove_tracker_file() raise else: if debug >= 1: print('Caught ResumableDownloadException (%s) - will ' 'retry' % e.message)
def GetMaxRetryDelay(): return config.getint('Boto', 'max_retry_delay', 32)
def __init__(self, host, aws_access_key_id=None, aws_secret_access_key=None, is_secure=True, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None, path='/', provider='aws'): """ :type host: str :param host: The host to make the connection to :keyword str aws_access_key_id: Your AWS Access Key ID (provided by Amazon). If none is specified, the value in your ``AWS_ACCESS_KEY_ID`` environmental variable is used. :keyword str aws_secret_access_key: Your AWS Secret Access Key (provided by Amazon). If none is specified, the value in your ``AWS_SECRET_ACCESS_KEY`` environmental variable is used. :type is_secure: boolean :param is_secure: Whether the connection is over SSL :type https_connection_factory: list or tuple :param https_connection_factory: A pair of an HTTP connection factory and the exceptions to catch. The factory should have a similar interface to L{httplib.HTTPSConnection}. :param str proxy: Address/hostname for a proxy server :type proxy_port: int :param proxy_port: The port to use when connecting over a proxy :type proxy_user: str :param proxy_user: The username to connect with on the proxy :type proxy_pass: str :param proxy_pass: The password to use when connection over a proxy. :type port: int :param port: The port to use to connect """ self.num_retries = 5 # Override passed-in is_secure setting if value was defined in config. if config.has_option('Boto', 'is_secure'): is_secure = config.getboolean('Boto', 'is_secure') self.is_secure = is_secure self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass) # define exceptions from httplib that we want to catch and retry self.http_exceptions = (httplib.HTTPException, socket.error, socket.gaierror) # define values in socket exceptions we don't want to catch self.socket_exception_values = (errno.EINTR,) if https_connection_factory is not None: self.https_connection_factory = https_connection_factory[0] self.http_exceptions += https_connection_factory[1] else: self.https_connection_factory = None if (is_secure): self.protocol = 'https' else: self.protocol = 'http' self.host = host self.path = path if debug: self.debug = debug else: self.debug = config.getint('Boto', 'debug', debug) if port: self.port = port else: self.port = PORTS_BY_SECURITY[is_secure] # Timeout used to tell httplib how long to wait for socket timeouts. # Default is to leave timeout unchanged, which will in turn result in # the socket's default global timeout being used. To specify a # timeout, set http_socket_timeout in Boto config. Regardless, # timeouts will only be applied if Python is 2.6 or greater. self.http_connection_kwargs = {} if (sys.version_info[0], sys.version_info[1]) >= (2, 6): if config.has_option('Boto', 'http_socket_timeout'): timeout = config.getint('Boto', 'http_socket_timeout') self.http_connection_kwargs['timeout'] = timeout self.provider = Provider(provider, aws_access_key_id, aws_secret_access_key) # allow config file to override default host if self.provider.host: self.host = self.provider.host # cache up to 20 connections per host, up to 20 hosts self._pool = ConnectionPool(20, 20) self._connection = (self.server_name(), self.is_secure) self._last_rs = None self._auth_handler = auth.get_auth_handler( host, config, self.provider, self._required_auth_capability())
def ResumableThreshold(): return config.getint('GSUtil', 'resumable_threshold', 8 * ONE_MIB)
def _mexe(self, method, path, data, headers, host=None, sender=None, override_num_retries=None): """ mexe - Multi-execute inside a loop, retrying multiple times to handle transient Internet errors by simply trying again. Also handles redirects. This code was inspired by the S3Utils classes posted to the boto-users Google group by Larry Bates. Thanks! """ boto.log.debug('Method: %s' % method) boto.log.debug('Path: %s' % path) boto.log.debug('Data: %s' % data) boto.log.debug('Headers: %s' % headers) boto.log.debug('Host: %s' % host) response = None body = None e = None if override_num_retries is None: num_retries = config.getint('Boto', 'num_retries', self.num_retries) else: num_retries = override_num_retries i = 0 connection = self.get_http_connection(host, self.is_secure) while i <= num_retries: try: if hasattr(sender, '__call__'): response = sender(connection, method, path, data, headers) else: connection.request(method, path, data, headers) response = connection.getresponse() location = response.getheader('location') # -- gross hack -- # httplib gets confused with chunked responses to HEAD requests # so I have to fake it out if method == 'HEAD' and getattr(response, 'chunked', False): response.chunked = 0 if response.status == 500 or response.status == 503: boto.log.debug('received %d response, retrying in %d seconds' % (response.status, 2 ** i)) body = response.read() elif response.status == 408: body = response.read() print('-------------------------') print(' 4 0 8 ') print('path=%s' % path) print(body) print('-------------------------') elif response.status < 300 or response.status >= 400 or \ not location: self.put_http_connection(host, self.is_secure, connection) return response else: scheme, host, path, params, query, fragment = \ urlparse.urlparse(location) if query: path += '?' + query boto.log.debug('Redirecting: %s' % scheme + '://' + host + path) connection = self.get_http_connection(host, scheme == 'https') continue except KeyboardInterrupt: sys.exit('Keyboard Interrupt') except self.http_exceptions as e: boto.log.debug('encountered %s exception, reconnecting' % e.__class__.__name__) connection = self.new_http_connection(host, self.is_secure) time.sleep(2 ** i) i += 1 # If we made it here, it's because we have exhausted our retries and stil haven't # succeeded. So, if we have a response object, use it to raise an exception. # Otherwise, raise the exception that must have already happened. if response: raise BotoServerError(response.status, response.reason, body) elif e: raise e else: raise BotoClientError('Please report this exception as a Boto Issue!')