Example #1
0
    def get_user_data(timeout=None, url=None, num_retries=None, data=None):
        """
        Get instance user data

        :type timeout: int
        :param timeout: timeout for the request

        :type url: string
        :param url: metadata_service_url

        :type num_retries: int
        :param num_retries: how many times to retry

        :type data: string
        :param data: user-defined userdata for testing

        :rtype: dict
        :return: instance user data as a dictionary
        """
        if data is not None:
            return data

        if timeout is None:
            timeout = config.getint('Boto', 'http_socket_timeout', 70)

        if num_retries is None:
            num_retries = config.getint('Boto', 'num_retries', 5)

        if url is None:
            url = config.get(
                'Boto', 'metadata_service_url', 'http://169.254.169.254')

        return get_userdata(timeout=timeout, url=url, num_retries=num_retries)
Example #2
0
def GetNewHttp(http_class=httplib2.Http, **kwargs):
    """Creates and returns a new httplib2.Http instance.

  Args:
    http_class: Optional custom Http class to use.
    **kwargs: Arguments to pass to http_class constructor.

  Returns:
    An initialized httplib2.Http instance.
  """

    ##Get Proxy configuration from boto file, defaults are None, 0 and False
    boto_proxy_config = {
        'proxy_host': config.get('Boto', 'proxy', None),
        'proxy_type': config.get('Boto', 'proxy_type', 'http'),
        'proxy_port': config.getint('Boto', 'proxy_port'),
        'proxy_user': config.get('Boto', 'proxy_user', None),
        'proxy_pass': config.get('Boto', 'proxy_pass', None),
        'proxy_rdns': config.get('Boto', 'proxy_rdns', None)
    }

    #Use SetProxyInfo to convert boto config to httplib2.proxyinfo object
    proxy_info = SetProxyInfo(boto_proxy_config)

    # Some installers don't package a certs file with httplib2, so use the
    # one included with gsutil.
    kwargs['ca_certs'] = GetCertsFile()
    # Use a non-infinite SSL timeout to avoid hangs during network flakiness.
    kwargs['timeout'] = SSL_TIMEOUT_SEC
    http = http_class(proxy_info=proxy_info, **kwargs)
    http.disable_ssl_certificate_validation = (not config.getbool(
        'Boto', 'https_validate_certificates'))
    return http
Example #3
0
 def _populate_keys_from_metadata_server(self):
     # get_instance_metadata is imported here because of a circular
     # dependency.
     boto.log.debug("Retrieving credentials from metadata server.")
     from boto.utils import get_instance_metadata
     timeout = config.getfloat('Boto', 'metadata_service_timeout', 1.0)
     attempts = config.getint('Boto', 'metadata_service_num_attempts', 1)
     # The num_retries arg is actually the total number of attempts made,
     # so the config options is named *_num_attempts to make this more
     # clear to users.
     metadata = get_instance_metadata(
         timeout=timeout,
         num_retries=attempts,
         data='meta-data/iam/security-credentials/')
     if metadata:
         # I'm assuming there's only one role on the instance profile.
         security = metadata.values()[0]
         self._access_key = security['AccessKeyId']
         self._secret_key = self._convert_key_to_str(
             security['SecretAccessKey'])
         self._security_token = security['Token']
         expires_at = security['Expiration']
         self._credential_expiry_time = datetime.strptime(
             expires_at, "%Y-%m-%dT%H:%M:%SZ")
         boto.log.debug("Retrieved credentials will expire in %s at: %s",
                        self._credential_expiry_time - datetime.now(),
                        expires_at)
Example #4
0
 def __init__(self, name, access_key=None, secret_key=None,
              security_token=None):
     self.host = None
     self.port = None
     self.host_header = None
     self.access_key = access_key
     self.secret_key = secret_key
     self.security_token = security_token
     self.name = name
     self.acl_class = self.AclClassMap[self.name]
     self.canned_acls = self.CannedAclsMap[self.name]
     self._credential_expiry_time = None
     self.get_credentials(access_key, secret_key)
     self.configure_headers()
     self.configure_errors()
     # Allow config file to override default host and port.
     host_opt_name = '%s_host' % self.HostKeyMap[self.name]
     if config.has_option('Credentials', host_opt_name):
         self.host = config.get('Credentials', host_opt_name)
     port_opt_name = '%s_port' % self.HostKeyMap[self.name]
     if config.has_option('Credentials', port_opt_name):
         self.port = config.getint('Credentials', port_opt_name)
     host_header_opt_name = '%s_host_header' % self.HostKeyMap[self.name]
     if config.has_option('Credentials', host_header_opt_name):
         self.host_header = config.get('Credentials', host_header_opt_name)
Example #5
0
 def __init__(self, name, access_key=None, secret_key=None,
              security_token=None, profile_name=None):
     self.host = None
     self.port = None
     self.host_header = None
     self.access_key = access_key
     self.secret_key = secret_key
     self.security_token = security_token
     self.profile_name = profile_name
     self.name = name
     self.acl_class = self.AclClassMap[self.name]
     self.canned_acls = self.CannedAclsMap[self.name]
     self._credential_expiry_time = None
     self.get_credentials(access_key, secret_key, security_token, profile_name)
     self.configure_headers()
     self.configure_errors()
     # Allow config file to override default host and port.
     host_opt_name = '%s_host' % self.HostKeyMap[self.name]
     if config.has_option('Credentials', host_opt_name):
         self.host = config.get('Credentials', host_opt_name)
     port_opt_name = '%s_port' % self.HostKeyMap[self.name]
     if config.has_option('Credentials', port_opt_name):
         self.port = config.getint('Credentials', port_opt_name)
     host_header_opt_name = '%s_host_header' % self.HostKeyMap[self.name]
     if config.has_option('Credentials', host_header_opt_name):
         self.host_header = config.get('Credentials', host_header_opt_name)
Example #6
0
def GetJsonResumableChunkSize():
  chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size',
                             1024*1024*100L)
  if chunk_size == 0:
    chunk_size = 1024*256L
  elif chunk_size % 1024*256L != 0:
    chunk_size += (1024*256L - (chunk_size % (1024*256L)))
Example #7
0
    def _mexe(self, method, path, data, headers, host=None, sender=None):
        """
        mexe - Multi-execute inside a loop, retrying multiple times to handle
               transient Internet errors by simply trying again.
               Also handles redirects.

        This code was inspired by the S3Utils classes posted to the boto-users
        Google group by Larry Bates.  Thanks!
        """
        boto.log.debug("Method: %s" % method)
        boto.log.debug("Path: %s" % path)
        boto.log.debug("Data: %s" % data)
        boto.log.debug("Headers: %s" % headers)
        boto.log.debug("Host: %s" % host)
        response = None
        body = None
        e = None
        num_retries = config.getint("Boto", "num_retries", self.num_retries)
        i = 0
        connection = self.get_http_connection(host, self.is_secure)
        while i <= num_retries:
            try:
                if callable(sender):
                    response = sender(connection, method, path, data, headers)
                else:
                    connection.request(method, path, data, headers)
                    response = connection.getresponse()
                location = response.getheader("location")
                # -- gross hack --
                # httplib gets confused with chunked responses to HEAD requests
                # so I have to fake it out
                if method == "HEAD" and getattr(response, "chunked", False):
                    response.chunked = 0
                if response.status == 500 or response.status == 503:
                    boto.log.debug("received %d response, retrying in %d seconds" % (response.status, 2 ** i))
                    body = response.read()
                elif response.status == 408:
                    body = response.read()
                    print "-------------------------"
                    print "         4 0 8           "
                    print "path=%s" % path
                    print body
                    print "-------------------------"
                elif response.status < 300 or response.status >= 400 or not location:
                    self.put_http_connection(host, self.is_secure, connection)
                    return response
                else:
                    scheme, host, path, params, query, fragment = urlparse.urlparse(location)
                    if query:
                        path += "?" + query
                    boto.log.debug("Redirecting: %s" % scheme + "://" + host + path)
                    connection = self.get_http_connection(host, scheme == "https")
                    continue
            except KeyboardInterrupt:
                sys.exit("Keyboard Interrupt")
            except self.http_exceptions, e:
                boto.log.debug("encountered %s exception, reconnecting" % e.__class__.__name__)
                connection = self.new_http_connection(host, self.is_secure)
            time.sleep(2 ** i)
            i += 1
Example #8
0
    def __init__(self, name, access_key=None, secret_key=None,
                 security_token=None, profile_name=None):
        self.host = None
        self.port = None
        self.host_header = None
        self.access_key = access_key
        self.secret_key = secret_key
        self.security_token = security_token
        self.profile_name = profile_name
        self.name = name
        self.acl_class = self.AclClassMap[self.name]
        self.canned_acls = self.CannedAclsMap[self.name]
        self._credential_expiry_time = None

        # Load shared credentials file if it exists
        shared_path = os.path.join(expanduser('~'), '.' + name, 'credentials')
        self.shared_credentials = Config(do_load=False)
        if os.path.isfile(shared_path):
            self.shared_credentials.load_from_path(shared_path)

        self.get_credentials(access_key, secret_key, security_token, profile_name)
        self.configure_headers()
        self.configure_errors()

        # Allow config file to override default host and port.
        host_opt_name = '%s_host' % self.HostKeyMap[self.name]
        if config.has_option('Credentials', host_opt_name):
            self.host = config.get('Credentials', host_opt_name)
        port_opt_name = '%s_port' % self.HostKeyMap[self.name]
        if config.has_option('Credentials', port_opt_name):
            self.port = config.getint('Credentials', port_opt_name)
        host_header_opt_name = '%s_host_header' % self.HostKeyMap[self.name]
        if config.has_option('Credentials', host_header_opt_name):
            self.host_header = config.get('Credentials', host_header_opt_name)
Example #9
0
    def __init__(self, name, access_key=None, secret_key=None,
                 security_token=None, profile_name=None):
        self.host = None
        self.port = None
        self.host_header = None
        self.access_key = access_key
        self.secret_key = secret_key
        self.security_token = security_token
        self.profile_name = profile_name
        self.name = name
        self.acl_class = self.AclClassMap[self.name]
        self.canned_acls = self.CannedAclsMap[self.name]
        self._credential_expiry_time = None

        # Load shared credentials file if it exists
        shared_path = os.path.join(expanduser('~'), '.' + name, 'credentials')
        self.shared_credentials = Config(do_load=False)
        if os.path.isfile(shared_path):
            self.shared_credentials.load_from_path(shared_path)

        self.get_credentials(access_key, secret_key, security_token, profile_name)
        self.configure_headers()
        self.configure_errors()

        # Allow config file to override default host and port.
        host_opt_name = '%s_host' % self.HostKeyMap[self.name]
        if config.has_option('Credentials', host_opt_name):
            self.host = config.get('Credentials', host_opt_name)
        port_opt_name = '%s_port' % self.HostKeyMap[self.name]
        if config.has_option('Credentials', port_opt_name):
            self.port = config.getint('Credentials', port_opt_name)
        host_header_opt_name = '%s_host_header' % self.HostKeyMap[self.name]
        if config.has_option('Credentials', host_header_opt_name):
            self.host_header = config.get('Credentials', host_header_opt_name)
Example #10
0
File: provider.py Project: 10sr/hue
 def _populate_keys_from_metadata_server(self):
     # get_instance_metadata is imported here because of a circular
     # dependency.
     boto.log.debug("Retrieving credentials from metadata server.")
     from boto.utils import get_instance_metadata
     timeout = config.getfloat('Boto', 'metadata_service_timeout', 1.0)
     attempts = config.getint('Boto', 'metadata_service_num_attempts', 1)
     # The num_retries arg is actually the total number of attempts made,
     # so the config options is named *_num_attempts to make this more
     # clear to users.
     metadata = get_instance_metadata(
         timeout=timeout, num_retries=attempts,
         data='meta-data/iam/security-credentials/')
     if metadata:
         creds = self._get_credentials_from_metadata(metadata)
         self._access_key = creds[0]
         self._secret_key = creds[1]
         self._security_token = creds[2]
         expires_at = creds[3]
         # I'm assuming there's only one role on the instance profile.
         self._credential_expiry_time = datetime.strptime(
             expires_at, "%Y-%m-%dT%H:%M:%SZ")
         boto.log.debug("Retrieved credentials will expire in %s at: %s",
                        self._credential_expiry_time - datetime.now(),
                        expires_at)
Example #11
0
    def _populate_keys_from_metadata_server(self):
        # get_instance_metadata is imported here because of a circular
        # dependency.
        boto.log.debug("Retrieving credentials from metadata server.")
        from boto.utils import get_instance_metadata

        timeout = config.getfloat("Boto", "metadata_service_timeout", 1.0)
        attempts = config.getint("Boto", "metadata_service_num_attempts", 1)
        # The num_retries arg is actually the total number of attempts made,
        # so the config options is named *_num_attempts to make this more
        # clear to users.
        metadata = get_instance_metadata(
            timeout=timeout, num_retries=attempts, data="meta-data/iam/security-credentials/"
        )
        if metadata:
            # I'm assuming there's only one role on the instance profile.
            security = metadata.values()[0]
            self._access_key = security["AccessKeyId"]
            self._secret_key = self._convert_key_to_str(security["SecretAccessKey"])
            self._security_token = security["Token"]
            expires_at = security["Expiration"]
            self._credential_expiry_time = datetime.strptime(expires_at, "%Y-%m-%dT%H:%M:%SZ")
            boto.log.debug(
                "Retrieved credentials will expire in %s at: %s",
                self._credential_expiry_time - datetime.now(),
                expires_at,
            )
Example #12
0
def GetJsonResumableChunkSize():
  chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size',
                             long(1024 * 1024 * 100))
  if chunk_size == 0:
    chunk_size = long(1024 * 256)
  elif chunk_size % long(1024 * 256) != 0:
    chunk_size += (long(1024 * 256) - (chunk_size % (long(1024 * 256))))
  return chunk_size
Example #13
0
def _BatchSort(in_iter, out_file):
    """Sorts input lines from in_iter and outputs to out_file.

  Sorts in batches as input arrives, so input file does not need to be loaded
  into memory all at once. Derived from Python Recipe 466302: Sorting big
  files the Python 2.4 way by Nicolas Lehuen.

  Sorted format is per _BuildTmpOutputLine. We're sorting on the entire line
  when we could just sort on the first record (URL); but the sort order is
  identical either way.

  Args:
    in_iter: Input iterator.
    out_file: Output file.
  """
    # Note: If chunk_files gets very large we can run out of open FDs. See .boto
    # file comments about rsync_buffer_lines. If increasing rsync_buffer_lines
    # doesn't suffice (e.g., for someone synchronizing with a really large
    # bucket), an option would be to make gsutil merge in passes, never
    # opening all chunk files simultaneously.
    buffer_size = config.getint("GSUtil", "rsync_buffer_lines", 32000)
    chunk_files = []
    try:
        while True:
            current_chunk = sorted(islice(in_iter, buffer_size))
            if not current_chunk:
                break
            output_chunk = io.open("%s-%06i" % (out_file.name, len(chunk_files)), mode="w+", encoding=UTF8)
            chunk_files.append(output_chunk)
            output_chunk.writelines(unicode("".join(current_chunk)))
            output_chunk.flush()
            output_chunk.seek(0)
        out_file.writelines(heapq.merge(*chunk_files))
    except IOError as e:
        if e.errno == errno.EMFILE:
            raise CommandException(
                "\n".join(
                    textwrap.wrap(
                        "Synchronization failed because too many open file handles were "
                        "needed while building synchronization state. Please see the "
                        "comments about rsync_buffer_lines in your .boto config file for a "
                        "possible way to address this problem."
                    )
                )
            )
        raise
    finally:
        for chunk_file in chunk_files:
            try:
                chunk_file.close()
                os.remove(chunk_file.name)
            except:
                pass
Example #14
0
def _BatchSort(in_iter, out_file):
    """Sorts input lines from in_iter and outputs to out_file.

  Sorts in batches as input arrives, so input file does not need to be loaded
  into memory all at once. Derived from Python Recipe 466302: Sorting big
  files the Python 2.4 way by Nicolas Lehuen.

  Sorted format is per _BuildTmpOutputLine. We're sorting on the entire line
  when we could just sort on the first record (URL); but the sort order is
  identical either way.

  Args:
    in_iter: Input iterator.
    out_file: Output file.
  """
    # Note: If chunk_files gets very large we can run out of open FDs. See .boto
    # file comments about rsync_buffer_lines. If increasing rsync_buffer_lines
    # doesn't suffice (e.g., for someone synchronizing with a really large
    # bucket), an option would be to make gsutil merge in passes, never
    # opening all chunk files simultaneously.
    buffer_size = config.getint('GSUtil', 'rsync_buffer_lines', 32000)
    chunk_files = []
    try:
        while True:
            current_chunk = sorted(islice(in_iter, buffer_size))
            if not current_chunk:
                break
            output_chunk = io.open('%s-%06i' %
                                   (out_file.name, len(chunk_files)),
                                   mode='w+',
                                   encoding=UTF8)
            chunk_files.append(output_chunk)
            output_chunk.writelines(unicode(''.join(current_chunk)))
            output_chunk.flush()
            output_chunk.seek(0)
        out_file.writelines(heapq.merge(*chunk_files))
    except IOError as e:
        if e.errno == errno.EMFILE:
            raise CommandException('\n'.join(
                textwrap.wrap(
                    'Synchronization failed because too many open file handles were '
                    'needed while building synchronization state. Please see the '
                    'comments about rsync_buffer_lines in your .boto config file for a '
                    'possible way to address this problem.')))
        raise
    finally:
        for chunk_file in chunk_files:
            try:
                chunk_file.close()
                os.remove(chunk_file.name)
            except:
                pass
Example #15
0
def SetProxyInfo():
    """Sets proxy info from boto and environment and converts to httplib2.ProxyInfo.

  Args:
    None.

  Returns:
    httplib2.ProxyInfo constructed from boto or environment variable string.
  """
    #Defining proxy_type based on httplib2 library, accounting for None entry too.
    proxy_type_spec = {'socks4': 1, 'socks5': 2, 'http': 3, 'https': 3}
    boto_proxy_val = config.get('Boto', 'proxy_type', None)

    #proxy_type defaults to 'http (3)' for backwards compatibility
    proxy_type = proxy_type_spec.get(boto_proxy_val) or proxy_type_spec['http']
    proxy_host = config.get('Boto', 'proxy', None)

    #For proxy_info below, proxy_rdns fails for socks4 and socks5 so restricting use
    #to http only
    proxy_info = httplib2.ProxyInfo(
        proxy_host=proxy_host,
        proxy_type=proxy_type,
        proxy_port=config.getint('Boto', 'proxy_port', 0),
        proxy_user=config.get('Boto', 'proxy_user', None),
        proxy_pass=config.get('Boto', 'proxy_pass', None),
        proxy_rdns=config.getbool(
            'Boto', 'proxy_rdns',
            True if proxy_type == proxy_type_spec['http'] else False))

    #Added to force socks proxies not to use rdns
    if not (proxy_info.proxy_type == proxy_type_spec['http']):
        proxy_info.proxy_rdns = False

    if not (proxy_info.proxy_host and proxy_info.proxy_port):
        # Fall back to using the environment variable. Use only http proxies.
        for proxy_env_var in ['http_proxy', 'https_proxy', 'HTTPS_PROXY']:
            if proxy_env_var in os.environ and os.environ[proxy_env_var]:
                proxy_info = ProxyInfoFromEnvironmentVar(proxy_env_var)
                # Assume proxy_rnds is True if a proxy environment variable exists.
                proxy_info.proxy_rdns = config.getbool('Boto', 'proxy_rdns',
                                                       True)
                break

    return proxy_info
Example #16
0
def GetNewHttp(http_class=httplib2.Http, **kwargs):
  """Creates and returns a new httplib2.Http instance.

  Args:
    http_class: Optional custom Http class to use.
    **kwargs: Arguments to pass to http_class constructor.

  Returns:
    An initialized httplib2.Http instance.
  """
  proxy_host = config.get('Boto', 'proxy', None)
  proxy_info = httplib2.ProxyInfo(
      proxy_type=3,
      proxy_host=proxy_host,
      proxy_port=config.getint('Boto', 'proxy_port', 0),
      proxy_user=config.get('Boto', 'proxy_user', None),
      proxy_pass=config.get('Boto', 'proxy_pass', None),
      proxy_rdns=config.get('Boto',
                            'proxy_rdns',
                            True if proxy_host else False))

  if not (proxy_info.proxy_host and proxy_info.proxy_port):
    # Fall back to using the environment variable.
    for proxy_env_var in ['http_proxy', 'https_proxy', 'HTTPS_PROXY']:
      if proxy_env_var in os.environ and os.environ[proxy_env_var]:
        proxy_info = ProxyInfoFromEnvironmentVar(proxy_env_var)
        # Assume proxy_rnds is True if a proxy environment variable exists.
        proxy_info.proxy_rdns = config.get('Boto', 'proxy_rdns', True)
        break

  # Some installers don't package a certs file with httplib2, so use the
  # one included with gsutil.
  kwargs['ca_certs'] = GetCertsFile()
  # Use a non-infinite SSL timeout to avoid hangs during network flakiness.
  kwargs['timeout'] = SSL_TIMEOUT_SEC
  http = http_class(proxy_info=proxy_info, **kwargs)
  http.disable_ssl_certificate_validation = (not config.getbool(
      'Boto', 'https_validate_certificates'))
  return http
Example #17
0
    def __init__(self, host, aws_access_key_id=None, aws_secret_access_key=None,
                 is_secure=True, port=None, proxy=None, proxy_port=None,
                 proxy_user=None, proxy_pass=None, debug=0,
                 https_connection_factory=None, path='/', provider='aws'):
        """
        :type host: str
        :param host: The host to make the connection to
       
        :keyword str aws_access_key_id: Your AWS Access Key ID (provided by
            Amazon). If none is specified, the value in your 
            ``AWS_ACCESS_KEY_ID`` environmental variable is used.
        :keyword str aws_secret_access_key: Your AWS Secret Access Key 
            (provided by Amazon). If none is specified, the value in your 
            ``AWS_SECRET_ACCESS_KEY`` environmental variable is used.

        :type is_secure: boolean
        :param is_secure: Whether the connection is over SSL

        :type https_connection_factory: list or tuple
        :param https_connection_factory: A pair of an HTTP connection
                                         factory and the exceptions to catch.
                                         The factory should have a similar
                                         interface to L{httplib.HTTPSConnection}.

        :param str proxy: Address/hostname for a proxy server

        :type proxy_port: int
        :param proxy_port: The port to use when connecting over a proxy

        :type proxy_user: str
        :param proxy_user: The username to connect with on the proxy

        :type proxy_pass: str
        :param proxy_pass: The password to use when connection over a proxy.

        :type port: int
        :param port: The port to use to connect
        """
        self.num_retries = 5
        # Override passed-in is_secure setting if value was defined in config.
        if config.has_option('Boto', 'is_secure'):
            is_secure = config.getboolean('Boto', 'is_secure')
        self.is_secure = is_secure
        self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass)
        # define exceptions from httplib that we want to catch and retry
        self.http_exceptions = (httplib.HTTPException, socket.error,
                                socket.gaierror)
        # define values in socket exceptions we don't want to catch
        self.socket_exception_values = (errno.EINTR,)
        if https_connection_factory is not None:
            self.https_connection_factory = https_connection_factory[0]
            self.http_exceptions += https_connection_factory[1]
        else:
            self.https_connection_factory = None
        if (is_secure):
            self.protocol = 'https'
        else:
            self.protocol = 'http'
        self.host = host
        self.path = path
        if debug:
            self.debug = debug
        else:
            self.debug = config.getint('Boto', 'debug', debug)
        if port:
            self.port = port
        else:
            self.port = PORTS_BY_SECURITY[is_secure]

        self.provider = Provider(provider,
                                 aws_access_key_id,
                                 aws_secret_access_key)

        # allow config file to override default host
        if self.provider.host:
            self.host = self.provider.host

        # cache up to 20 connections per host, up to 20 hosts
        self._pool = ConnectionPool(20, 20)
        self._connection = (self.server_name(), self.is_secure)
        self._last_rs = None
        self._auth_handler = auth.get_auth_handler(
              host, config, self.provider, self._required_auth_capability()) 
    def get_file(self, key, fp, headers, cb=None, num_cb=10, torrent=False,
                 version_id=None, hash_algs=None):
        """
        Retrieves a file from a Key
        :type key: :class:`boto.s3.key.Key` or subclass
        :param key: The Key object from which upload is to be downloaded

        :type fp: file
        :param fp: File pointer into which data should be downloaded

        :type headers: string
        :param: headers to send when retrieving the files

        :type cb: function
        :param cb: (optional) a callback function that will be called to report
             progress on the download.  The callback should accept two integer
             parameters, the first representing the number of bytes that have
             been successfully transmitted from the storage service and
             the second representing the total number of bytes that need
             to be transmitted.

        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the cb
             parameter this parameter determines the granularity of the callback
             by defining the maximum number of times the callback will be
             called during the file transfer.

        :type torrent: bool
        :param torrent: Flag for whether to get a torrent for the file

        :type version_id: string
        :param version_id: The version ID (optional)

        :type hash_algs: dictionary
        :param hash_algs: (optional) Dictionary of hash algorithms and
            corresponding hashing class that implements update() and digest().
            Defaults to {'md5': hashlib/md5.md5}.

        Raises ResumableDownloadException if a problem occurs during
            the transfer.
        """

        debug = key.bucket.connection.debug
        if not headers:
            headers = {}

        # Use num-retries from constructor if one was provided; else check
        # for a value specified in the boto config file; else default to 6.
        if self.num_retries is None:
            self.num_retries = config.getint('Boto', 'num_retries', 6)
        progress_less_iterations = 0

        while True:  # Retry as long as we're making progress.
            had_file_bytes_before_attempt = get_cur_file_size(fp)
            try:
                self._attempt_resumable_download(key, fp, headers, cb, num_cb,
                                                 torrent, version_id, hash_algs)
                # Download succceded, so remove the tracker file (if have one).
                self._remove_tracker_file()
                # Previously, check_final_md5() was called here to validate
                # downloaded file's checksum, however, to be consistent with
                # non-resumable downloads, this call was removed. Checksum
                # validation of file contents should be done by the caller.
                if debug >= 1:
                    print('Resumable download complete.')
                return
            except self.RETRYABLE_EXCEPTIONS as e:
                if debug >= 1:
                    print('Caught exception (%s)' % e.__repr__())
                if isinstance(e, IOError) and e.errno == errno.EPIPE:
                    # Broken pipe error causes httplib to immediately
                    # close the socket (http://bugs.python.org/issue5542),
                    # so we need to close and reopen the key before resuming
                    # the download.
                    if isinstance(key, GSKey):
                      key.get_file(fp, headers, cb, num_cb, torrent, version_id,
                                   override_num_retries=0, hash_algs=hash_algs)
                    else:
                      key.get_file(fp, headers, cb, num_cb, torrent, version_id,
                                   override_num_retries=0)
            except ResumableDownloadException as e:
                if (e.disposition ==
                    ResumableTransferDisposition.ABORT_CUR_PROCESS):
                    if debug >= 1:
                        print('Caught non-retryable ResumableDownloadException '
                              '(%s)' % e.message)
                    raise
                elif (e.disposition ==
                    ResumableTransferDisposition.ABORT):
                    if debug >= 1:
                        print('Caught non-retryable ResumableDownloadException '
                              '(%s); aborting and removing tracker file' %
                              e.message)
                    self._remove_tracker_file()
                    raise
                else:
                    if debug >= 1:
                        print('Caught ResumableDownloadException (%s) - will '
                              'retry' % e.message)

            # At this point we had a re-tryable failure; see if made progress.
            if get_cur_file_size(fp) > had_file_bytes_before_attempt:
                progress_less_iterations = 0
            else:
                progress_less_iterations += 1

            if progress_less_iterations > self.num_retries:
                # Don't retry any longer in the current process.
                raise ResumableDownloadException(
                    'Too many resumable download attempts failed without '
                    'progress. You might try this download again later',
                    ResumableTransferDisposition.ABORT_CUR_PROCESS)

            # Close the key, in case a previous download died partway
            # through and left data in the underlying key HTTP buffer.
            # Do this within a try/except block in case the connection is
            # closed (since key.close() attempts to do a final read, in which
            # case this read attempt would get an IncompleteRead exception,
            # which we can safely ignore.
            try:
                key.close()
            except httplib.IncompleteRead:
                pass

            sleep_time_secs = 2**progress_less_iterations
            if debug >= 1:
                print('Got retryable failure (%d progress-less in a row).\n'
                      'Sleeping %d seconds before re-trying' %
                      (progress_less_iterations, sleep_time_secs))
            time.sleep(sleep_time_secs)
Example #19
0
    def __init__(self, host, aws_access_key_id=None, aws_secret_access_key=None,
                 is_secure=True, port=None, proxy=None, proxy_port=None,
                 proxy_user=None, proxy_pass=None, debug=0,
                 https_connection_factory=None, path='/', provider=None):
        """
        :type host: string
        :param host: The host to make the connection to

        :type aws_access_key_id: string
        :param aws_access_key_id: AWS Access Key ID (provided by Amazon)

        :type aws_secret_access_key: string
        :param aws_secret_access_key: Secret Access Key (provided by Amazon)

        :type is_secure: boolean
        :param is_secure: Whether the connection is over SSL

        :type https_connection_factory: list or tuple
        :param https_connection_factory: A pair of an HTTP connection
                                         factory and the exceptions to catch.
                                         The factory should have a similar
                                         interface to L{httplib.HTTPSConnection}.

        :type proxy:
        :param proxy:

        :type proxy_port: int
        :param proxy_port: The port to use when connecting over a proxy

        :type proxy_user: string
        :param proxy_user: The username to connect with on the proxy

        :type proxy_pass: string
        :param proxy_pass: The password to use when connection over a proxy.

        :type port: integer
        :param port: The port to use to connect
        """

        self.num_retries = 5
        # Override passed-in is_secure setting if value was defined in config.
        if config.has_option('Boto', 'is_secure'):
          is_secure = config.getboolean('Boto', 'is_secure')
        self.is_secure = is_secure
        self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass)
        # define exceptions from httplib that we want to catch and retry
        self.http_exceptions = (httplib.HTTPException, socket.error, socket.gaierror)
        # define values in socket exceptions we don't want to catch
        self.socket_exception_values = (errno.EINTR,)
        if https_connection_factory is not None:
            self.https_connection_factory = https_connection_factory[0]
            self.http_exceptions += https_connection_factory[1]
        else:
            self.https_connection_factory = None
        if (is_secure):
            self.protocol = 'https'
        else:
            self.protocol = 'http'
        self.host = host
        self.path = path
        if debug:
            self.debug = debug
        else:
            self.debug = config.getint('Boto', 'debug', debug)
        if port:
            self.port = port
        else:
            self.port = PORTS_BY_SECURITY[is_secure]

        # If credentials have been loaded with provider-dependent ids and
        # secret keys, use them.
        if provider:
            if provider == "google":
                if (config.has_option('Credentials',
                                      'gs_access_key_id') and
                    config.has_option('Credentials',
                                      'gs_secret_access_key')):
                    aws_access_key_id = config.get(
                        'Credentials', 'gs_access_key_id')
                    aws_secret_access_key = config.get(
                        'Credentials',
                        'gs_secret_access_key')
                # allow config file to override default host
                if (config.has_option('Credentials',
                                      'gs_host')):
                    self.host = config.get('Credentials',
                                           'gs_host')
            elif provider == "amazon":
                if (config.has_option('Credentials',
                                      'aws_access_key_id') and
                   config.has_option('Credentials',
                                     'aws_secret_access_key')):
                    aws_access_key_id = config.get('Credentials',
                                                   'aws_access_key_id')
                    aws_secret_access_key = config.get('Credentials',
                                                       'aws_secret_access_key')
                # allow config file to override default host
                if (config.has_option('Credentials',
                                      'aws_host')):
                    self.host = config.get('Credentials',
                                           'aws_host')
        if aws_access_key_id:
            self.aws_access_key_id = aws_access_key_id
        elif os.environ.has_key('AWS_ACCESS_KEY_ID'):
            self.aws_access_key_id = os.environ['AWS_ACCESS_KEY_ID']
        elif config.has_option('Credentials', 'aws_access_key_id'):
            self.aws_access_key_id = config.get('Credentials', 'aws_access_key_id')

        if aws_secret_access_key:
            self.aws_secret_access_key = aws_secret_access_key
        elif os.environ.has_key('AWS_SECRET_ACCESS_KEY'):
            self.aws_secret_access_key = os.environ['AWS_SECRET_ACCESS_KEY']
        elif config.has_option('Credentials', 'aws_secret_access_key'):
            self.aws_secret_access_key = config.get('Credentials', 'aws_secret_access_key')

        # initialize an HMAC for signatures, make copies with each request
        self.hmac = hmac.new(self.aws_secret_access_key, digestmod=sha)
        if sha256:
            self.hmac_256 = hmac.new(self.aws_secret_access_key, digestmod=sha256)
        else:
            self.hmac_256 = None

        # cache up to 20 connections per host, up to 20 hosts
        self._pool = ConnectionPool(20, 20)
        self._connection = (self.server_name(), self.is_secure)
        self._last_rs = None
Example #20
0
    def __init__(self,
                 host,
                 aws_access_key_id=None,
                 aws_secret_access_key=None,
                 is_secure=True,
                 port=None,
                 proxy=None,
                 proxy_port=None,
                 proxy_user=None,
                 proxy_pass=None,
                 debug=0,
                 https_connection_factory=None,
                 path='/'):
        """
        :type host: string
        :param host: The host to make the connection to

        :type aws_access_key_id: string
        :param aws_access_key_id: AWS Access Key ID (provided by Amazon)

        :type aws_secret_access_key: string
        :param aws_secret_access_key: Secret Access Key (provided by Amazon)

        :type is_secure: boolean
        :param is_secure: Whether the connection is over SSL

        :type https_connection_factory: list or tuple
        :param https_connection_factory: A pair of an HTTP connection
                                         factory and the exceptions to catch.
                                         The factory should have a similar
                                         interface to L{httplib.HTTPSConnection}.

        :type proxy:
        :param proxy:

        :type proxy_port: int
        :param proxy_port: The port to use when connecting over a proxy

        :type proxy_user: string
        :param proxy_user: The username to connect with on the proxy

        :type proxy_pass: string
        :param proxy_pass: The password to use when connection over a proxy.

        :type port: integer
        :param port: The port to use to connect
        """

        self.num_retries = 5
        self.is_secure = is_secure
        self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass)
        # define exceptions from httplib that we want to catch and retry
        self.http_exceptions = (httplib.HTTPException, socket.error,
                                socket.gaierror)
        # define values in socket exceptions we don't want to catch
        self.socket_exception_values = (errno.EINTR, )
        if https_connection_factory is not None:
            self.https_connection_factory = https_connection_factory[0]
            self.http_exceptions += https_connection_factory[1]
        else:
            self.https_connection_factory = None
        if (is_secure):
            self.protocol = 'https'
        else:
            self.protocol = 'http'
        self.host = host
        self.path = path
        if debug:
            self.debug = debug
        else:
            self.debug = config.getint('Boto', 'debug', debug)
        if port:
            self.port = port
        else:
            self.port = PORTS_BY_SECURITY[is_secure]

        if aws_access_key_id:
            self.aws_access_key_id = aws_access_key_id
        elif os.environ.has_key('AWS_ACCESS_KEY_ID'):
            self.aws_access_key_id = os.environ['AWS_ACCESS_KEY_ID']
        elif config.has_option('Credentials', 'aws_access_key_id'):
            self.aws_access_key_id = config.get('Credentials',
                                                'aws_access_key_id')

        if aws_secret_access_key:
            self.aws_secret_access_key = aws_secret_access_key
        elif os.environ.has_key('AWS_SECRET_ACCESS_KEY'):
            self.aws_secret_access_key = os.environ['AWS_SECRET_ACCESS_KEY']
        elif config.has_option('Credentials', 'aws_secret_access_key'):
            self.aws_secret_access_key = config.get('Credentials',
                                                    'aws_secret_access_key')

        # initialize an HMAC for signatures, make copies with each request
        self.hmac = hmac.new(self.aws_secret_access_key, digestmod=sha)
        if sha256:
            self.hmac_256 = hmac.new(self.aws_secret_access_key,
                                     digestmod=sha256)
        else:
            self.hmac_256 = None

        # cache up to 20 connections per host, up to 20 hosts
        self._pool = ConnectionPool(20, 20)
        self._connection = (self.server_name(), self.is_secure)
        self._last_rs = None
Example #21
0
    def __init__(
        self,
        host,
        aws_access_key_id=None,
        aws_secret_access_key=None,
        is_secure=True,
        port=None,
        proxy=None,
        proxy_port=None,
        proxy_user=None,
        proxy_pass=None,
        debug=0,
        https_connection_factory=None,
        path="/",
        provider="aws",
    ):
        """
        :type host: string
        :param host: The host to make the connection to

        :type aws_access_key_id: string
        :param aws_access_key_id: AWS Access Key ID (provided by Amazon)

        :type aws_secret_access_key: string
        :param aws_secret_access_key: Secret Access Key (provided by Amazon)

        :type is_secure: boolean
        :param is_secure: Whether the connection is over SSL

        :type https_connection_factory: list or tuple
        :param https_connection_factory: A pair of an HTTP connection
                                         factory and the exceptions to catch.
                                         The factory should have a similar
                                         interface to L{httplib.HTTPSConnection}.

        :type proxy:
        :param proxy:

        :type proxy_port: int
        :param proxy_port: The port to use when connecting over a proxy

        :type proxy_user: string
        :param proxy_user: The username to connect with on the proxy

        :type proxy_pass: string
        :param proxy_pass: The password to use when connection over a proxy.

        :type port: integer
        :param port: The port to use to connect
        """

        self.num_retries = 5
        # Override passed-in is_secure setting if value was defined in config.
        if config.has_option("Boto", "is_secure"):
            is_secure = config.getboolean("Boto", "is_secure")
        self.is_secure = is_secure
        self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass)
        # define exceptions from httplib that we want to catch and retry
        self.http_exceptions = (httplib.HTTPException, socket.error, socket.gaierror)
        # define values in socket exceptions we don't want to catch
        self.socket_exception_values = (errno.EINTR,)
        if https_connection_factory is not None:
            self.https_connection_factory = https_connection_factory[0]
            self.http_exceptions += https_connection_factory[1]
        else:
            self.https_connection_factory = None
        if is_secure:
            self.protocol = "https"
        else:
            self.protocol = "http"
        self.host = host
        self.path = path
        if debug:
            self.debug = debug
        else:
            self.debug = config.getint("Boto", "debug", debug)
        if port:
            self.port = port
        else:
            self.port = PORTS_BY_SECURITY[is_secure]

        self.provider = Provider(provider, aws_access_key_id, aws_secret_access_key)

        # allow config file to override default host
        if self.provider.host:
            self.host = self.provider.host

        if self.secret_key is None:
            raise BotoClientError("No credentials have been supplied")
        # initialize an HMAC for signatures, make copies with each request
        self.hmac = hmac.new(self.secret_key, digestmod=sha)
        if sha256:
            self.hmac_256 = hmac.new(self.secret_key, digestmod=sha256)
        else:
            self.hmac_256 = None

        # cache up to 20 connections per host, up to 20 hosts
        self._pool = ConnectionPool(20, 20)
        self._connection = (self.server_name(), self.is_secure)
        self._last_rs = None
Example #22
0
def GetNumRetries():
  return config.getint('Boto', 'num_retries', 23)
    def send_file(self, key, fp, headers, cb=None, num_cb=10):
        """
        Upload a file to a key into a bucket on GS, using GS resumable upload
        protocol.
        
        :type key: :class:`boto.s3.key.Key` or subclass
        :param key: The Key object to which data is to be uploaded
        
        :type fp: file-like object
        :param fp: The file pointer to upload
        
        :type headers: dict
        :param headers: The headers to pass along with the PUT request
        
        :type cb: function
        :param cb: a callback function that will be called to report progress on
            the upload.  The callback should accept two integer parameters, the
            first representing the number of bytes that have been successfully
            transmitted to GS, and the second representing the total number of
            bytes that need to be transmitted.
                    
        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the cb
            parameter, this parameter determines the granularity of the callback
            by defining the maximum number of times the callback will be called
            during the file transfer. Providing a negative integer will cause
            your callback to be called with each buffer read.
             
        Raises ResumableUploadException if a problem occurs during the transfer.
        """

        if not headers:
            headers = {}

        fp.seek(0, os.SEEK_END)
        file_length = fp.tell()
        fp.seek(0)
        debug = key.bucket.connection.debug

        # Use num-retries from constructor if one was provided; else check
        # for a value specified in the boto config file; else default to 5.
        if self.num_retries is None:
            self.num_retries = config.getint("Boto", "num_retries", 5)
        progress_less_iterations = 0

        while True:  # Retry as long as we're making progress.
            server_had_bytes_before_attempt = self.server_has_bytes
            try:
                etag = self._attempt_resumable_upload(key, fp, file_length, headers, cb, num_cb)
                # Upload succceded, so remove the tracker file (if have one).
                self._remove_tracker_file()
                self._check_final_md5(key, etag)
                if debug >= 1:
                    print "Resumable upload complete."
                return
            except self.RETRYABLE_EXCEPTIONS, e:
                if debug >= 1:
                    print ("Caught exception (%s)" % e.__repr__())
            except ResumableUploadException, e:
                if e.disposition == ResumableTransferDisposition.ABORT:
                    if debug >= 1:
                        print ("Caught non-retryable ResumableUploadException " "(%s)" % e.message)
                    raise
                else:
                    if debug >= 1:
                        print ("Caught ResumableUploadException (%s) - will " "retry" % e.message)
  def SendFile(self, key, fp, size, headers, canned_acl=None, cb=None,
               num_cb=10):
    """Upload a file to a key into a bucket on GS, resumable upload protocol.

    Args:
      key: `boto.s3.key.Key` or subclass representing the upload destination.
      fp: File pointer to upload
      size: Size of the file to upload.
      headers: The headers to pass along with the PUT request
      canned_acl: Optional canned ACL to apply to object.
      cb: Callback function that will be called to report progress on
          the upload.  The callback should accept two integer parameters, the
          first representing the number of bytes that have been successfully
          transmitted to GS, and the second representing the total number of
          bytes that need to be transmitted.
      num_cb: (optional) If a callback is specified with the cb parameter, this
              parameter determines the granularity of the callback by defining
              the maximum number of times the callback will be called during the
              file transfer. Providing a negative integer will cause your
              callback to be called with each buffer read.

    Raises:
      ResumableUploadException if a problem occurs during the transfer.
    """

    if not headers:
      headers = {}
    # If Content-Type header is present and set to None, remove it.
    # This is gsutil's way of asking boto to refrain from auto-generating
    # that header.
    content_type = 'Content-Type'
    if content_type in headers and headers[content_type] is None:
      del headers[content_type]

    if canned_acl:
      headers[key.provider.acl_header] = canned_acl

    headers['User-Agent'] = UserAgent

    file_length = size
    debug = key.bucket.connection.debug

    # Use num-retries from constructor if one was provided; else check
    # for a value specified in the boto config file; else default to 5.
    if self.num_retries is None:
      self.num_retries = config.getint('Boto', 'num_retries', 6)
    self.progress_less_iterations = 0

    while True:  # Retry as long as we're making progress.
      service_had_bytes_before_attempt = self.service_has_bytes
      try:
        # Save generation and metageneration in class state so caller
        # can find these values, for use in preconditions of future
        # operations on the uploaded object.
        (_, self.generation, self.metageneration) = (
            self._AttemptResumableUpload(key, fp, file_length,
                                         headers, cb, num_cb))

        key.generation = self.generation
        if debug >= 1:
          self.logger.debug('Resumable upload complete.')
        return
      except self.RETRYABLE_EXCEPTIONS, e:
        if debug >= 1:
          self.logger.debug('Caught exception (%s)', e.__repr__())
        if isinstance(e, IOError) and e.errno == errno.EPIPE:
          # Broken pipe error causes httplib to immediately
          # close the socket (http://bugs.python.org/issue5542),
          # so we need to close the connection before we resume
          # the upload (which will cause a new connection to be
          # opened the next time an HTTP request is sent).
          key.bucket.connection.connection.close()
      except ResumableUploadException, e:
        self.HandleResumableUploadException(e, debug)
Example #25
0
def GetNumRetries():
  return config.getint('Boto', 'num_retries', 6)
Example #26
0
    def get_file(self,
                 key,
                 fp,
                 headers,
                 cb=None,
                 num_cb=10,
                 torrent=False,
                 version_id=None):
        """
        Retrieves a file from a Key
        :type key: :class:`boto.s3.key.Key` or subclass
        :param key: The Key object from which upload is to be downloaded
        
        :type fp: file
        :param fp: File pointer into which data should be downloaded
        
        :type headers: string
        :param: headers to send when retrieving the files
        
        :type cb: function
        :param cb: (optional) a callback function that will be called to report
             progress on the download.  The callback should accept two integer
             parameters, the first representing the number of bytes that have
             been successfully transmitted from the storage service and
             the second representing the total number of bytes that need
             to be transmitted.
        
        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the cb
             parameter this parameter determines the granularity of the callback
             by defining the maximum number of times the callback will be
             called during the file transfer.
             
        :type torrent: bool
        :param torrent: Flag for whether to get a torrent for the file

        :type version_id: string
        :param version_id: The version ID (optional)

        Raises ResumableDownloadException if a problem occurs during
            the transfer.
        """

        debug = key.bucket.connection.debug
        if not headers:
            headers = {}

        # Use num-retries from constructor if one was provided; else check
        # for a value specified in the boto config file; else default to 5.
        if self.num_retries is None:
            self.num_retries = config.getint('Boto', 'num_retries', 5)
        progress_less_iterations = 0

        while True:  # Retry as long as we're making progress.
            had_file_bytes_before_attempt = get_cur_file_size(fp)
            try:
                self._attempt_resumable_download(key, fp, headers, cb, num_cb,
                                                 torrent, version_id)
                # Download succceded, so remove the tracker file (if have one).
                self._remove_tracker_file()
                self._check_final_md5(key, fp.name)
                if debug >= 1:
                    print 'Resumable download complete.'
                return
            except self.RETRYABLE_EXCEPTIONS, e:
                if debug >= 1:
                    print('Caught exception (%s)' % e.__repr__())
            except ResumableDownloadException, e:
                if (e.disposition ==
                        ResumableTransferDisposition.ABORT_CUR_PROCESS):
                    if debug >= 1:
                        print(
                            'Caught non-retryable ResumableDownloadException '
                            '(%s)' % e.message)
                    raise
                elif (e.disposition == ResumableTransferDisposition.ABORT):
                    if debug >= 1:
                        print(
                            'Caught non-retryable ResumableDownloadException '
                            '(%s); aborting and removing tracker file' %
                            e.message)
                    self._remove_tracker_file()
                    raise
                else:
                    if debug >= 1:
                        print(
                            'Caught ResumableDownloadException (%s) - will '
                            'retry' % e.message)
Example #27
0
    def __init__(self,
                 server,
                 aws_access_key_id=None,
                 aws_secret_access_key=None,
                 is_secure=True,
                 port=None,
                 proxy=None,
                 proxy_port=None,
                 proxy_user=None,
                 proxy_pass=None,
                 debug=0,
                 https_connection_factory=None):
        """
        @type server: string
        @param server: The server to make the connection to
        
        @type aws_access_key_id: string
        @param aws_access_key_id: AWS Access Key ID (provided by Amazon)
        
        @type aws_secret_access_key: string
        @param aws_secret_access_key: Secret Access Key (provided by Amazon)
        
        @type is_secure: boolean
        @param is_secure: Whether the connection is over SSL
        
        @type https_connection_factory: list or tuple
        @param https_connection_factory: A pair of an HTTP connection
                                         factory and the exceptions to catch.
                                         The factory should have a similar
                                         interface to L{httplib.HTTPSConnection}.
        
        @type proxy:
        @param proxy:
        
        @type proxy_port: int
        @param proxy_port: The port to use when connecting over a proxy
        
        @type proxy_user: string
        @param proxy_user: The username to connect with on the proxy
        
        @type proxy_pass: string
        @param proxy_pass: The password to use when connection over a proxy.
        
        @type port: integer
        @param port: The port to use to connect
        """

        self.num_retries = 5
        self.is_secure = is_secure
        self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass)
        # define exceptions from httplib that we want to catch and retry
        self.http_exceptions = (httplib.HTTPException, socket.error,
                                socket.gaierror)
        # define values in socket exceptions we don't want to catch
        self.socket_exception_values = (errno.EINTR, )
        if https_connection_factory is not None:
            self.https_connection_factory = https_connection_factory[0]
            self.http_exceptions += https_connection_factory[1]
        else:
            self.https_connection_factory = None
        if (is_secure):
            self.protocol = 'https'
        else:
            self.protocol = 'http'
        self.server = server
        if debug:
            self.debug = debug
        else:
            self.debug = config.getint('Boto', 'debug', debug)
        if port:
            self.port = port
        else:
            self.port = PORTS_BY_SECURITY[is_secure]
        if self.port == 80:
            self.server_name = server
        else:
            # This unfortunate little hack can be attributed to
            # a difference in the 2.6 version of httplib.  In old
            # versions, it would append ":443" to the hostname sent
            # in the Host header and so we needed to make sure we
            # did the same when calculating the signature.  In 2.6
            # it no longer does that.  Hence, this kludge.
            if sys.version[:3] == "2.6" and self.port in [80, 443]:
                self.server_name = server
            else:
                self.server_name = '%s:%d' % (server, self.port)

        if aws_access_key_id:
            self.aws_access_key_id = aws_access_key_id
        elif os.environ.has_key('AWS_ACCESS_KEY_ID'):
            self.aws_access_key_id = os.environ['AWS_ACCESS_KEY_ID']
        elif config.has_option('Credentials', 'aws_access_key_id'):
            self.aws_access_key_id = config.get('Credentials',
                                                'aws_access_key_id')

        if aws_secret_access_key:
            self.aws_secret_access_key = aws_secret_access_key
        elif os.environ.has_key('AWS_SECRET_ACCESS_KEY'):
            self.aws_secret_access_key = os.environ['AWS_SECRET_ACCESS_KEY']
        elif config.has_option('Credentials', 'aws_secret_access_key'):
            self.aws_secret_access_key = config.get('Credentials',
                                                    'aws_secret_access_key')

        # initialize an HMAC for signatures, make copies with each request
        self.hmac = hmac.new(self.aws_secret_access_key, digestmod=sha)
        if sha256:
            self.hmac_256 = hmac.new(self.aws_secret_access_key,
                                     digestmod=sha256)
        else:
            self.hmac_256 = None

        # cache up to 20 connections
        self._cache = boto.utils.LRUCache(20)
        self.refresh_http_connection(self.server, self.is_secure)
        self._last_rs = None
Example #28
0
    def send_file(self, key, fp, headers, cb=None, num_cb=10):
        """
        Upload a file to a key into a bucket on GS, using GS resumable upload
        protocol.
        
        :type key: :class:`boto.s3.key.Key` or subclass
        :param key: The Key object to which data is to be uploaded
        
        :type fp: file-like object
        :param fp: The file pointer to upload
        
        :type headers: dict
        :param headers: The headers to pass along with the PUT request
        
        :type cb: function
        :param cb: a callback function that will be called to report progress on
            the upload.  The callback should accept two integer parameters, the
            first representing the number of bytes that have been successfully
            transmitted to GS, and the second representing the total number of
            bytes that need to be transmitted.
                    
        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the cb
            parameter, this parameter determines the granularity of the callback
            by defining the maximum number of times the callback will be called
            during the file transfer. Providing a negative integer will cause
            your callback to be called with each buffer read.
             
        Raises ResumableUploadException if a problem occurs during the transfer.
        """

        if not headers:
            headers = {}

        fp.seek(0, os.SEEK_END)
        file_length = fp.tell()
        fp.seek(0)
        debug = key.bucket.connection.debug

        # Use num-retries from constructor if one was provided; else check
        # for a value specified in the boto config file; else default to 5.
        if self.num_retries is None:
            self.num_retries = config.getint('Boto', 'num_retries', 5)
        progress_less_iterations = 0

        while True:  # Retry as long as we're making progress.
            server_had_bytes_before_attempt = self.server_has_bytes
            try:
                etag = self._attempt_resumable_upload(key, fp, file_length,
                                                      headers, cb, num_cb)
                # Upload succceded, so remove the tracker file (if have one).
                self._remove_tracker_file()
                self._check_final_md5(key, etag)
                if debug >= 1:
                    print 'Resumable upload complete.'
                return
            except self.RETRYABLE_EXCEPTIONS, e:
                if debug >= 1:
                    print('Caught exception (%s)' % e.__repr__())
            except ResumableUploadException, e:
                if (e.disposition ==
                    ResumableTransferDisposition.ABORT_CUR_PROCESS):
                    if debug >= 1:
                        print('Caught non-retryable ResumableUploadException '
                              '(%s); aborting but retaining tracker file' %
                              e.message)
                    raise
                elif (e.disposition ==
                    ResumableTransferDisposition.ABORT):
                    if debug >= 1:
                        print('Caught non-retryable ResumableUploadException '
                              '(%s); aborting and removing tracker file' %
                              e.message)
                    self._remove_tracker_file()
                    raise
                else:
                    if debug >= 1:
                        print('Caught ResumableUploadException (%s) - will '
                              'retry' % e.message)
Example #29
0
    def get_file(self, key, fp, headers, cb=None, num_cb=10, torrent=False,
                 version_id=None, hash_algs=None):
        """
        Retrieves a file from a Key
        :type key: :class:`boto.s3.key.Key` or subclass
        :param key: The Key object from which upload is to be downloaded
        
        :type fp: file
        :param fp: File pointer into which data should be downloaded
        
        :type headers: string
        :param: headers to send when retrieving the files
        
        :type cb: function
        :param cb: (optional) a callback function that will be called to report
             progress on the download.  The callback should accept two integer
             parameters, the first representing the number of bytes that have
             been successfully transmitted from the storage service and
             the second representing the total number of bytes that need
             to be transmitted.
        
        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the cb
             parameter this parameter determines the granularity of the callback
             by defining the maximum number of times the callback will be
             called during the file transfer.
             
        :type torrent: bool
        :param torrent: Flag for whether to get a torrent for the file

        :type version_id: string
        :param version_id: The version ID (optional)

        :type hash_algs: dictionary
        :param hash_algs: (optional) Dictionary of hash algorithms and
            corresponding hashing class that implements update() and digest().
            Defaults to {'md5': hashlib/md5.md5}.

        Raises ResumableDownloadException if a problem occurs during
            the transfer.
        """

        debug = key.bucket.connection.debug
        if not headers:
            headers = {}

        # Use num-retries from constructor if one was provided; else check
        # for a value specified in the boto config file; else default to 5.
        if self.num_retries is None:
            self.num_retries = config.getint('Boto', 'num_retries', 5)
        progress_less_iterations = 0

        while True:  # Retry as long as we're making progress.
            had_file_bytes_before_attempt = get_cur_file_size(fp)
            try:
                self._attempt_resumable_download(key, fp, headers, cb, num_cb,
                                                 torrent, version_id, hash_algs)
                # Download succceded, so remove the tracker file (if have one).
                self._remove_tracker_file()
                # Previously, check_final_md5() was called here to validate 
                # downloaded file's checksum, however, to be consistent with
                # non-resumable downloads, this call was removed. Checksum
                # validation of file contents should be done by the caller.
                if debug >= 1:
                    print 'Resumable download complete.'
                return
            except self.RETRYABLE_EXCEPTIONS, e:
                if debug >= 1:
                    print('Caught exception (%s)' % e.__repr__())
                if isinstance(e, IOError) and e.errno == errno.EPIPE:
                    # Broken pipe error causes httplib to immediately
                    # close the socket (http://bugs.python.org/issue5542),
                    # so we need to close and reopen the key before resuming
                    # the download.
                    key.get_file(fp, headers, cb, num_cb, torrent, version_id,
                                 override_num_retries=0, hash_algs=hash_algs)
            except ResumableDownloadException, e:
                if (e.disposition ==
                    ResumableTransferDisposition.ABORT_CUR_PROCESS):
                    if debug >= 1:
                        print('Caught non-retryable ResumableDownloadException '
                              '(%s)' % e.message)
                    raise
                elif (e.disposition ==
                    ResumableTransferDisposition.ABORT):
                    if debug >= 1:
                        print('Caught non-retryable ResumableDownloadException '
                              '(%s); aborting and removing tracker file' %
                              e.message)
                    self._remove_tracker_file()
                    raise
                else:
                    if debug >= 1:
                        print('Caught ResumableDownloadException (%s) - will '
                              'retry' % e.message)
Example #30
0
    def _mexe(self,
              request,
              sender=None,
              override_num_retries=None,
              retry_handler=None):
        """
        mexe - Multi-execute inside a loop, retrying multiple times to handle
               transient Internet errors by simply trying again.
               Also handles redirects.

        This code was inspired by the S3Utils classes posted to the boto-users
        Google group by Larry Bates.  Thanks!

        """
        boto.log.debug('Method: %s' % request.method)
        boto.log.debug('Path: %s' % request.path)
        boto.log.debug('Data: %s' % request.body)
        boto.log.debug('Headers: %s' % request.headers)
        boto.log.debug('Host: %s' % request.host)
        response = None
        body = None
        e = None
        if override_num_retries is None:
            num_retries = config.getint('Boto', 'num_retries',
                                        self.num_retries)
        else:
            num_retries = override_num_retries
        i = 0
        connection = self.get_http_connection(request.host, self.is_secure)
        while i <= num_retries:
            # Use binary exponential backoff to desynchronize client requests
            next_sleep = random.random() * (2**i)
            try:
                # we now re-sign each request before it is retried
                boto.log.debug('Token: %s' % self.provider.security_token)
                request.authorize(connection=self)
                if callable(sender):
                    response = sender(connection, request.method, request.path,
                                      request.body, request.headers)
                else:
                    connection.request(request.method, request.path,
                                       request.body, request.headers)
                    response = connection.getresponse()
                location = response.getheader('location')
                # -- gross hack --
                # httplib gets confused with chunked responses to HEAD requests
                # so I have to fake it out
                if request.method == 'HEAD' and getattr(
                        response, 'chunked', False):
                    response.chunked = 0
                if callable(retry_handler):
                    status = retry_handler(response, i, next_sleep)
                    if status:
                        msg, i, next_sleep = status
                        if msg:
                            boto.log.debug(msg)
                        time.sleep(next_sleep)
                        continue
                if response.status == 500 or response.status == 503:
                    msg = 'Received %d response.  ' % response.status
                    msg += 'Retrying in %3.1f seconds' % next_sleep
                    boto.log.debug(msg)
                    body = response.read()
                elif response.status < 300 or response.status >= 400 or \
                        not location:
                    self.put_http_connection(request.host, self.is_secure,
                                             connection)
                    return response
                else:
                    scheme, request.host, request.path, \
                        params, query, fragment = urlparse.urlparse(location)
                    if query:
                        request.path += '?' + query
                    msg = 'Redirecting: %s' % scheme + '://'
                    msg += request.host + request.path
                    boto.log.debug(msg)
                    connection = self.get_http_connection(
                        request.host, scheme == 'https')
                    continue
            except self.http_exceptions, e:
                for unretryable in self.http_unretryable_exceptions:
                    if isinstance(e, unretryable):
                        boto.log.debug(
                            'encountered unretryable %s exception, re-raising'
                            % e.__class__.__name__)
                        raise e
                boto.log.debug('encountered %s exception, reconnecting' % \
                                  e.__class__.__name__)
                connection = self.new_http_connection(request.host,
                                                      self.is_secure)
            time.sleep(next_sleep)
            i += 1
Example #31
0
    def _mexe(self,
              method,
              path,
              data,
              headers,
              host=None,
              sender=None,
              override_num_retries=None):
        """
        mexe - Multi-execute inside a loop, retrying multiple times to handle
               transient Internet errors by simply trying again.
               Also handles redirects.

        This code was inspired by the S3Utils classes posted to the boto-users
        Google group by Larry Bates.  Thanks!
        """
        boto.log.debug('Method: %s' % method)
        boto.log.debug('Path: %s' % path)
        boto.log.debug('Data: %s' % data)
        boto.log.debug('Headers: %s' % headers)
        boto.log.debug('Host: %s' % host)
        response = None
        body = None
        e = None
        if override_num_retries is None:
            num_retries = config.getint('Boto', 'num_retries',
                                        self.num_retries)
        else:
            num_retries = override_num_retries
        i = 0
        connection = self.get_http_connection(host, self.is_secure)
        while i <= num_retries:
            try:
                if callable(sender):
                    response = sender(connection, method, path, data, headers)
                else:
                    connection.request(method, path, data, headers)
                    response = connection.getresponse()
                location = response.getheader('location')
                # -- gross hack --
                # httplib gets confused with chunked responses to HEAD requests
                # so I have to fake it out
                if method == 'HEAD' and getattr(response, 'chunked', False):
                    response.chunked = 0
                if response.status == 500 or response.status == 503:
                    boto.log.debug(
                        'received %d response, retrying in %d seconds' %
                        (response.status, 2**i))
                    body = response.read()
                elif response.status == 408:
                    body = response.read()
                    print '-------------------------'
                    print '         4 0 8           '
                    print 'path=%s' % path
                    print body
                    print '-------------------------'
                elif response.status < 300 or response.status >= 400 or \
                        not location:
                    self.put_http_connection(host, self.is_secure, connection)
                    return response
                else:
                    scheme, host, path, params, query, fragment = \
                            urlparse.urlparse(location)
                    if query:
                        path += '?' + query
                    boto.log.debug('Redirecting: %s' % scheme + '://' + host +
                                   path)
                    connection = self.get_http_connection(
                        host, scheme == 'https')
                    continue
            except KeyboardInterrupt:
                sys.exit('Keyboard Interrupt')
            except self.http_exceptions, e:
                for unretryable in self.http_unretryable_exceptions:
                    if isinstance(e, unretryable):
                        boto.log.debug(
                            'encountered unretryable %s exception, re-raising'
                            % e.__class__.__name__)
                        raise e
                boto.log.debug('encountered %s exception, reconnecting' % \
                                  e.__class__.__name__)
                connection = self.new_http_connection(host, self.is_secure)
            time.sleep(2**i)
            i += 1
Example #32
0
    def __init__(self,
                 host,
                 aws_access_key_id=None,
                 aws_secret_access_key=None,
                 is_secure=True,
                 port=None,
                 proxy=None,
                 proxy_port=None,
                 proxy_user=None,
                 proxy_pass=None,
                 debug=0,
                 https_connection_factory=None,
                 path='/',
                 provider='aws'):
        """
        :type host: str
        :param host: The host to make the connection to
       
        :keyword str aws_access_key_id: Your AWS Access Key ID (provided by
            Amazon). If none is specified, the value in your 
            ``AWS_ACCESS_KEY_ID`` environmental variable is used.
        :keyword str aws_secret_access_key: Your AWS Secret Access Key 
            (provided by Amazon). If none is specified, the value in your 
            ``AWS_SECRET_ACCESS_KEY`` environmental variable is used.

        :type is_secure: boolean
        :param is_secure: Whether the connection is over SSL

        :type https_connection_factory: list or tuple
        :param https_connection_factory: A pair of an HTTP connection
                                         factory and the exceptions to catch.
                                         The factory should have a similar
                                         interface to L{httplib.HTTPSConnection}.

        :param str proxy: Address/hostname for a proxy server

        :type proxy_port: int
        :param proxy_port: The port to use when connecting over a proxy

        :type proxy_user: str
        :param proxy_user: The username to connect with on the proxy

        :type proxy_pass: str
        :param proxy_pass: The password to use when connection over a proxy.

        :type port: int
        :param port: The port to use to connect
        """
        self.num_retries = 5
        # Override passed-in is_secure setting if value was defined in config.
        if config.has_option('Boto', 'is_secure'):
            is_secure = config.getboolean('Boto', 'is_secure')
        self.is_secure = is_secure
        # Whether or not to validate server certificates.  At some point in the
        # future, the default should be flipped to true.
        self.https_validate_certificates = config.getbool(
            'Boto', 'https_validate_certificates', False)
        if self.https_validate_certificates and not HAVE_HTTPS_CONNECTION:
            raise BotoClientError(
                "SSL server certificate validation is enabled in boto "
                "configuration, but Python dependencies required to "
                "support this feature are not available. Certificate "
                "validation is only supported when running under Python "
                "2.6 or later.")
        self.ca_certificates_file = config.get_value('Boto',
                                                     'ca_certificates_file',
                                                     DEFAULT_CA_CERTS_FILE)
        self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass)
        # define exceptions from httplib that we want to catch and retry
        self.http_exceptions = (httplib.HTTPException, socket.error,
                                socket.gaierror)
        # define subclasses of the above that are not retryable.
        self.http_unretryable_exceptions = []
        if HAVE_HTTPS_CONNECTION:
            self.http_unretryable_exceptions.append(ssl.SSLError)
            self.http_unretryable_exceptions.append(
                https_connection.InvalidCertificateException)

        # define values in socket exceptions we don't want to catch
        self.socket_exception_values = (errno.EINTR, )
        if https_connection_factory is not None:
            self.https_connection_factory = https_connection_factory[0]
            self.http_exceptions += https_connection_factory[1]
        else:
            self.https_connection_factory = None
        if (is_secure):
            self.protocol = 'https'
        else:
            self.protocol = 'http'
        self.host = host
        self.path = path
        if debug:
            self.debug = debug
        else:
            self.debug = config.getint('Boto', 'debug', debug)
        if port:
            self.port = port
        else:
            self.port = PORTS_BY_SECURITY[is_secure]

        # Timeout used to tell httplib how long to wait for socket timeouts.
        # Default is to leave timeout unchanged, which will in turn result in
        # the socket's default global timeout being used. To specify a
        # timeout, set http_socket_timeout in Boto config. Regardless,
        # timeouts will only be applied if Python is 2.6 or greater.
        self.http_connection_kwargs = {}
        if (sys.version_info[0], sys.version_info[1]) >= (2, 6):
            if config.has_option('Boto', 'http_socket_timeout'):
                timeout = config.getint('Boto', 'http_socket_timeout')
                self.http_connection_kwargs['timeout'] = timeout

        self.provider = Provider(provider, aws_access_key_id,
                                 aws_secret_access_key)

        # allow config file to override default host
        if self.provider.host:
            self.host = self.provider.host

        # cache up to 20 connections per host, up to 20 hosts
        self._pool = ConnectionPool(20, 20)
        self._connection = (self.server_name(), self.is_secure)
        self._last_rs = None
        self._auth_handler = auth.get_auth_handler(
            host, config, self.provider, self._required_auth_capability())
Example #33
0
def ResumableThreshold():
    return config.getint('GSUtil', 'resumable_threshold', TWO_MB)
Example #34
0
    def _mexe(self, request, sender=None, override_num_retries=None, retry_handler=None):
        """
        mexe - Multi-execute inside a loop, retrying multiple times to handle
               transient Internet errors by simply trying again.
               Also handles redirects.

        This code was inspired by the S3Utils classes posted to the boto-users
        Google group by Larry Bates.  Thanks!

        """
        boto.log.debug("Method: %s" % request.method)
        boto.log.debug("Path: %s" % request.path)
        boto.log.debug("Data: %s" % request.body)
        boto.log.debug("Headers: %s" % request.headers)
        boto.log.debug("Host: %s" % request.host)
        boto.log.debug("Params: %s" % request.params)
        response = None
        body = None
        e = None
        if override_num_retries is None:
            num_retries = config.getint("Boto", "num_retries", self.num_retries)
        else:
            num_retries = override_num_retries
        i = 0
        connection = self.get_http_connection(request.host, self.is_secure)
        while i <= num_retries:
            # Use binary exponential backoff to desynchronize client requests.
            next_sleep = random.random() * (2 ** i)
            try:
                # we now re-sign each request before it is retried
                boto.log.debug("Token: %s" % self.provider.security_token)
                request.authorize(connection=self)
                if callable(sender):
                    response = sender(connection, request.method, request.path, request.body, request.headers)
                else:
                    connection.request(request.method, request.path, request.body, request.headers)
                    response = connection.getresponse()
                location = response.getheader("location")
                # -- gross hack --
                # httplib gets confused with chunked responses to HEAD requests
                # so I have to fake it out
                if request.method == "HEAD" and getattr(response, "chunked", False):
                    response.chunked = 0
                if callable(retry_handler):
                    status = retry_handler(response, i, next_sleep)
                    if status:
                        msg, i, next_sleep = status
                        if msg:
                            boto.log.debug(msg)
                        time.sleep(next_sleep)
                        continue
                if response.status == 500 or response.status == 503:
                    msg = "Received %d response.  " % response.status
                    msg += "Retrying in %3.1f seconds" % next_sleep
                    boto.log.debug(msg)
                    body = response.read()
                elif response.status < 300 or response.status >= 400 or not location:
                    self.put_http_connection(request.host, self.is_secure, connection)
                    return response
                else:
                    scheme, request.host, request.path, params, query, fragment = urlparse.urlparse(location)
                    if query:
                        request.path += "?" + query
                    msg = "Redirecting: %s" % scheme + "://"
                    msg += request.host + request.path
                    boto.log.debug(msg)
                    connection = self.get_http_connection(request.host, scheme == "https")
                    response = None
                    continue
            except self.http_exceptions, e:
                for unretryable in self.http_unretryable_exceptions:
                    if isinstance(e, unretryable):
                        boto.log.debug("encountered unretryable %s exception, re-raising" % e.__class__.__name__)
                        raise e
                boto.log.debug("encountered %s exception, reconnecting" % e.__class__.__name__)
                connection = self.new_http_connection(request.host, self.is_secure)
            time.sleep(next_sleep)
            i += 1
Example #35
0
    def __init__(self, host, aws_access_key_id=None, aws_secret_access_key=None,
                 is_secure=True, port=None, proxy=None, proxy_port=None,
                 proxy_user=None, proxy_pass=None, debug=0,
                 https_connection_factory=None, path='/', provider='aws'):
        """
        :type host: str
        :param host: The host to make the connection to

        :keyword str aws_access_key_id: Your AWS Access Key ID (provided by
            Amazon). If none is specified, the value in your
            ``AWS_ACCESS_KEY_ID`` environmental variable is used.
        :keyword str aws_secret_access_key: Your AWS Secret Access Key
            (provided by Amazon). If none is specified, the value in your
            ``AWS_SECRET_ACCESS_KEY`` environmental variable is used.

        :type is_secure: boolean
        :param is_secure: Whether the connection is over SSL

        :type https_connection_factory: list or tuple
        :param https_connection_factory: A pair of an HTTP connection
                                         factory and the exceptions to catch.
                                         The factory should have a similar
                                         interface to L{httplib.HTTPSConnection}.

        :param str proxy: Address/hostname for a proxy server

        :type proxy_port: int
        :param proxy_port: The port to use when connecting over a proxy

        :type proxy_user: str
        :param proxy_user: The username to connect with on the proxy

        :type proxy_pass: str
        :param proxy_pass: The password to use when connection over a proxy.

        :type port: int
        :param port: The port to use to connect
        """
        self.num_retries = 5
        # Override passed-in is_secure setting if value was defined in config.
        if config.has_option('Boto', 'is_secure'):
            is_secure = config.getboolean('Boto', 'is_secure')
        self.is_secure = is_secure
        # Whether or not to validate server certificates.  At some point in the
        # future, the default should be flipped to true.
        self.https_validate_certificates = config.getbool(
                'Boto', 'https_validate_certificates', False)
        if self.https_validate_certificates and not HAVE_HTTPS_CONNECTION:
            raise BotoClientError(
                    "SSL server certificate validation is enabled in boto "
                    "configuration, but Python dependencies required to "
                    "support this feature are not available. Certificate "
                    "validation is only supported when running under Python "
                    "2.6 or later.")
        self.ca_certificates_file = config.get_value(
                'Boto', 'ca_certificates_file', DEFAULT_CA_CERTS_FILE)
        self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass)
        # define exceptions from httplib that we want to catch and retry
        self.http_exceptions = (httplib.HTTPException, socket.error,
                                socket.gaierror)
        # define subclasses of the above that are not retryable.
        self.http_unretryable_exceptions = []
        if HAVE_HTTPS_CONNECTION:
            self.http_unretryable_exceptions.append(ssl.SSLError)
            self.http_unretryable_exceptions.append(
                    https_connection.InvalidCertificateException)

        # define values in socket exceptions we don't want to catch
        self.socket_exception_values = (errno.EINTR,)
        if https_connection_factory is not None:
            self.https_connection_factory = https_connection_factory[0]
            self.http_exceptions += https_connection_factory[1]
        else:
            self.https_connection_factory = None
        if (is_secure):
            self.protocol = 'https'
        else:
            self.protocol = 'http'
        self.host = host
        self.path = path
        if debug:
            self.debug = debug
        else:
            self.debug = config.getint('Boto', 'debug', debug)
        if port:
            self.port = port
        else:
            self.port = PORTS_BY_SECURITY[is_secure]

        # Timeout used to tell httplib how long to wait for socket timeouts.
        # Default is to leave timeout unchanged, which will in turn result in
        # the socket's default global timeout being used. To specify a
        # timeout, set http_socket_timeout in Boto config. Regardless,
        # timeouts will only be applied if Python is 2.6 or greater.
        self.http_connection_kwargs = {}
        if (sys.version_info[0], sys.version_info[1]) >= (2, 6):
            if config.has_option('Boto', 'http_socket_timeout'):
                timeout = config.getint('Boto', 'http_socket_timeout')
                self.http_connection_kwargs['timeout'] = timeout

        self.provider = Provider(provider,
                                 aws_access_key_id,
                                 aws_secret_access_key)

        # allow config file to override default host
        if self.provider.host:
            self.host = self.provider.host

        self._pool = ConnectionPool()
        self._connection = (self.server_name(), self.is_secure)
        self._last_rs = None
        self._auth_handler = auth.get_auth_handler(
              host, config, self.provider, self._required_auth_capability())
    def send_file(self, key, fp, headers, cb=None, num_cb=10):
        """
        Upload a file to a key into a bucket on GS, using GS resumable upload
        protocol.
        
        :type key: :class:`boto.s3.key.Key` or subclass
        :param key: The Key object to which data is to be uploaded
        
        :type fp: file-like object
        :param fp: The file pointer to upload
        
        :type headers: dict
        :param headers: The headers to pass along with the PUT request
        
        :type cb: function
        :param cb: a callback function that will be called to report progress on
            the upload.  The callback should accept two integer parameters, the
            first representing the number of bytes that have been successfully
            transmitted to GS, and the second representing the total number of
            bytes that need to be transmitted.
                    
        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the cb
            parameter, this parameter determines the granularity of the callback
            by defining the maximum number of times the callback will be called
            during the file transfer. Providing a negative integer will cause
            your callback to be called with each buffer read.
             
        Raises ResumableUploadException if a problem occurs during the transfer.
        """

        if not headers:
            headers = {}

        fp.seek(0, os.SEEK_END)
        file_length = fp.tell()
        fp.seek(0)
        debug = key.bucket.connection.debug

        # Use num-retries from constructor if one was provided; else check
        # for a value specified in the boto config file; else default to 5.
        if self.num_retries is None:
            self.num_retries = config.getint('Boto', 'num_retries', 5)
        progress_less_iterations = 0

        while True:  # Retry as long as we're making progress.
            server_had_bytes_before_attempt = self.server_has_bytes
            try:
                etag = self._attempt_resumable_upload(key, fp, file_length,
                                                      headers, cb, num_cb)
                # Upload succceded, so remove the tracker file (if have one).
                self._remove_tracker_file()
                self._check_final_md5(key, etag)
                if debug >= 1:
                    print 'Resumable upload complete.'
                return
            except self.RETRYABLE_EXCEPTIONS, e:
                if debug >= 1:
                    print('Caught exception (%s)' % e.__repr__())
                if isinstance(e, IOError) and e.errno == errno.EPIPE:
                    # Broken pipe error causes httplib to immediately
                    # close the socket (http://bugs.python.org/issue5542),
                    # so we need to close the connection before we resume
                    # the upload (which will cause a new connection to be
                    # opened the next time an HTTP request is sent).
                    key.bucket.connection.connection.close()
            except ResumableUploadException, e:
                if (e.disposition ==
                    ResumableTransferDisposition.ABORT_CUR_PROCESS):
                    if debug >= 1:
                        print('Caught non-retryable ResumableUploadException '
                              '(%s); aborting but retaining tracker file' %
                              e.message)
                    raise
                elif (e.disposition ==
                    ResumableTransferDisposition.ABORT):
                    if debug >= 1:
                        print('Caught non-retryable ResumableUploadException '
                              '(%s); aborting and removing tracker file' %
                              e.message)
                    self._remove_tracker_file()
                    raise
                else:
                    if debug >= 1:
                        print('Caught ResumableUploadException (%s) - will '
                              'retry' % e.message)
Example #37
0
    def __init__(self, host, aws_access_key_id=None,
                 aws_secret_access_key=None,
                 is_secure=True, port=None, proxy=None, proxy_port=None,
                 proxy_user=None, proxy_pass=None, debug=0,
                 https_connection_factory=None, path='/',
                 provider='aws', security_token=None,
                 suppress_consec_slashes=True,
                 validate_certs=True, profile_name=None):
        """
        :type host: str
        :param host: The host to make the connection to

        :keyword str aws_access_key_id: Your AWS Access Key ID (provided by
            Amazon). If none is specified, the value in your
            ``AWS_ACCESS_KEY_ID`` environmental variable is used.
        :keyword str aws_secret_access_key: Your AWS Secret Access Key
            (provided by Amazon). If none is specified, the value in your
            ``AWS_SECRET_ACCESS_KEY`` environmental variable is used.
        :keyword str security_token: The security token associated with
            temporary credentials issued by STS.  Optional unless using
            temporary credentials.  If none is specified, the environment
            variable ``AWS_SECURITY_TOKEN`` is used if defined.

        :type is_secure: boolean
        :param is_secure: Whether the connection is over SSL

        :type https_connection_factory: list or tuple
        :param https_connection_factory: A pair of an HTTP connection
            factory and the exceptions to catch.  The factory should have
            a similar interface to L{http_client.HTTPSConnection}.

        :param str proxy: Address/hostname for a proxy server

        :type proxy_port: int
        :param proxy_port: The port to use when connecting over a proxy

        :type proxy_user: str
        :param proxy_user: The username to connect with on the proxy

        :type proxy_pass: str
        :param proxy_pass: The password to use when connection over a proxy.

        :type port: int
        :param port: The port to use to connect

        :type suppress_consec_slashes: bool
        :param suppress_consec_slashes: If provided, controls whether
            consecutive slashes will be suppressed in key paths.

        :type validate_certs: bool
        :param validate_certs: Controls whether SSL certificates
            will be validated or not.  Defaults to True.

        :type profile_name: str
        :param profile_name: Override usual Credentials section in config
            file to use a named set of keys instead.
        """
        self.suppress_consec_slashes = suppress_consec_slashes
        self.num_retries = 6
        # Override passed-in is_secure setting if value was defined in config.
        if config.has_option('Boto', 'is_secure'):
            is_secure = config.getboolean('Boto', 'is_secure')
        self.is_secure = is_secure
        # Whether or not to validate server certificates.
        # The default is now to validate certificates.  This can be
        # overridden in the boto config file are by passing an
        # explicit validate_certs parameter to the class constructor.
        self.https_validate_certificates = config.getbool(
            'Boto', 'https_validate_certificates',
            validate_certs)
        if self.https_validate_certificates and not HAVE_HTTPS_CONNECTION:
            raise BotoClientError(
                    "SSL server certificate validation is enabled in boto "
                    "configuration, but Python dependencies required to "
                    "support this feature are not available. Certificate "
                    "validation is only supported when running under Python "
                    "2.6 or later.")
        certs_file = config.get_value(
                'Boto', 'ca_certificates_file', DEFAULT_CA_CERTS_FILE)
        if certs_file == 'system':
            certs_file = None
        self.ca_certificates_file = certs_file
        if port:
            self.port = port
        else:
            self.port = PORTS_BY_SECURITY[is_secure]

        self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass)
        # define exceptions from http_client that we want to catch and retry
        self.http_exceptions = (http_client.HTTPException, socket.error,
                                socket.gaierror, http_client.BadStatusLine)
        # define subclasses of the above that are not retryable.
        self.http_unretryable_exceptions = []
        if HAVE_HTTPS_CONNECTION:
            self.http_unretryable_exceptions.append(
                    https_connection.InvalidCertificateException)

        # define values in socket exceptions we don't want to catch
        self.socket_exception_values = (errno.EINTR,)
        if https_connection_factory is not None:
            self.https_connection_factory = https_connection_factory[0]
            self.http_exceptions += https_connection_factory[1]
        else:
            self.https_connection_factory = None
        if (is_secure):
            self.protocol = 'https'
        else:
            self.protocol = 'http'
        self.host = host
        self.path = path
        # if the value passed in for debug
        if not isinstance(debug, six.integer_types):
            debug = 0
        self.debug = config.getint('Boto', 'debug', debug)
        self.host_header = None

        # Timeout used to tell http_client how long to wait for socket timeouts.
        # Default is to leave timeout unchanged, which will in turn result in
        # the socket's default global timeout being used. To specify a
        # timeout, set http_socket_timeout in Boto config. Regardless,
        # timeouts will only be applied if Python is 2.6 or greater.
        self.http_connection_kwargs = {}
        if (sys.version_info[0], sys.version_info[1]) >= (2, 6):
            # If timeout isn't defined in boto config file, use 70 second
            # default as recommended by
            # http://docs.aws.amazon.com/amazonswf/latest/apireference/API_PollForActivityTask.html
            self.http_connection_kwargs['timeout'] = config.getint(
                'Boto', 'http_socket_timeout', 70)

        if isinstance(provider, Provider):
            # Allow overriding Provider
            self.provider = provider
        else:
            self._provider_type = provider
            self.provider = Provider(self._provider_type,
                                     aws_access_key_id,
                                     aws_secret_access_key,
                                     security_token,
                                     profile_name)

        # Allow config file to override default host, port, and host header.
        if self.provider.host:
            self.host = self.provider.host
        if self.provider.port:
            self.port = self.provider.port
        if self.provider.host_header:
            self.host_header = self.provider.host_header

        self._pool = ConnectionPool()
        self._connection = (self.host, self.port, self.is_secure)
        self._last_rs = None
        self._auth_handler = auth.get_auth_handler(
              host, config, self.provider, self._required_auth_capability())
        if getattr(self, 'AuthServiceName', None) is not None:
            self.auth_service_name = self.AuthServiceName
        self.request_hook = None
Example #38
0
def GetMaxRetryDelay():
  return config.getint('Boto', 'max_retry_delay', 60)
Example #39
0
def ResumableThreshold():
  return config.getint('GSUtil', 'resumable_threshold', TWO_MB)
Example #40
0
def ResumableThreshold():
  return config.getint('GSUtil', 'resumable_threshold', EIGHT_MIB)
Example #41
0
    def _mexe(self, method, path, data, headers, host=None, sender=None):
        """
        mexe - Multi-execute inside a loop, retrying multiple times to handle
               transient Internet errors by simply trying again.
               Also handles redirects.

        This code was inspired by the S3Utils classes posted to the boto-users
        Google group by Larry Bates.  Thanks!
        """
        boto.log.debug('Method: %s' % method)
        boto.log.debug('Path: %s' % path)
        boto.log.debug('Data: %s' % data)
        boto.log.debug('Headers: %s' % headers)
        boto.log.debug('Host: %s' % host)
        response = None
        body = None
        e = None
        num_retries = config.getint('Boto', 'num_retries', self.num_retries)
        i = 0
        connection = self.get_http_connection(host, self.is_secure)
        while i <= num_retries:
            try:
                if callable(sender):
                    response = sender(connection, method, path, data, headers)
                else:
                    connection.request(method, path, data, headers)
                    response = connection.getresponse()
                location = response.getheader('location')
                # -- gross hack --
                # httplib gets confused with chunked responses to HEAD requests
                # so I have to fake it out
                if method == 'HEAD' and response.chunked:
                    response.chunked = 0
                if response.status == 500 or response.status == 503:
                    boto.log.debug('received %d response, retrying in %d seconds' % (response.status, 2**i))
                    body = response.read()
                elif response.status == 408:
                    body = response.read()
                    print '-------------------------'
                    print '         4 0 8           '
                    print 'path=%s' % path
                    print body
                    print '-------------------------'
                elif response.status < 300 or response.status >= 400 or \
                        not location:
                    return response
                else:
                    scheme, host, path, params, query, fragment = \
                            urlparse.urlparse(location)
                    if query:
                        path += '?' + query
                    boto.log.debug('Redirecting: %s' % scheme + '://' + host + path)
                    connection = self.get_http_connection(host,
                            scheme == 'https')
                    continue
            except KeyboardInterrupt:
                sys.exit('Keyboard Interrupt')
            except self.http_exceptions, e:
                boto.log.debug('encountered %s exception, reconnecting' % \
                                  e.__class__.__name__)
                connection = self.refresh_http_connection(host, self.is_secure)
            time.sleep(2**i)
            i += 1
Example #42
0
    def _mexe(self, request, sender=None, override_num_retries=None):
        """
        mexe - Multi-execute inside a loop, retrying multiple times to handle
               transient Internet errors by simply trying again.
               Also handles redirects.

        This code was inspired by the S3Utils classes posted to the boto-users
        Google group by Larry Bates.  Thanks!
        """
        boto.log.debug('Method: %s' % request.method)
        boto.log.debug('Path: %s' % request.path)
        boto.log.debug('Data: %s' % request.body)
        boto.log.debug('Headers: %s' % request.headers)
        boto.log.debug('Host: %s' % request.host)
        response = None
        body = None
        e = None
        if override_num_retries is None:
            num_retries = config.getint('Boto', 'num_retries', self.num_retries)
        else:
            num_retries = override_num_retries
        i = 0
        connection = self.get_http_connection(request.host, self.is_secure)
        while i <= num_retries:
            try:
                # we now re-sign each request before it is retried
                request.authorize(connection=self)
                if callable(sender):
                    response = sender(connection, request.method, request.path,
                                      request.body, request.headers)
                else:
                    connection.request(request.method, request.path, request.body,
                                       request.headers)
                    response = connection.getresponse()
                location = response.getheader('location')
                # -- gross hack --
                # httplib gets confused with chunked responses to HEAD requests
                # so I have to fake it out
                if request.method == 'HEAD' and getattr(response, 'chunked', False):
                    response.chunked = 0
                if response.status == 500 or response.status == 503:
                    boto.log.debug('received %d response, retrying in %d seconds' % (response.status, 2 ** i))
                    body = response.read()
                elif response.status < 300 or response.status >= 400 or \
                        not location:
                    self.put_http_connection(request.host, self.is_secure, connection)
                    return response
                else:
                    scheme, request.host, request.path, params, query, fragment = \
                            urlparse.urlparse(location)
                    if query:
                        request.path += '?' + query
                    boto.log.debug('Redirecting: %s' % scheme + '://' + request.host + request.path)
                    connection = self.get_http_connection(request.host, scheme == 'https')
                    continue
            except KeyboardInterrupt:
                sys.exit('Keyboard Interrupt')
            except self.http_exceptions, e:
                for unretryable in self.http_unretryable_exceptions:
                    if isinstance(e, unretryable):
                        boto.log.debug(
                            'encountered unretryable %s exception, re-raising' %
                            e.__class__.__name__)
                        raise e
                boto.log.debug('encountered %s exception, reconnecting' % \
                                  e.__class__.__name__)
                connection = self.new_http_connection(request.host, self.is_secure)
            time.sleep(2 ** i)
            i += 1
Example #43
0
    def __init__(self, server, aws_access_key_id=None,
                 aws_secret_access_key=None, is_secure=True, port=None,
                 proxy=None, proxy_port=None, proxy_user=None,
                 proxy_pass=None, debug=0, https_connection_factory=None):
        """
        @type server: string
        @param server: The server to make the connection to
        
        @type aws_access_key_id: string
        @param aws_access_key_id: AWS Access Key ID (provided by Amazon)
        
        @type aws_secret_access_key: string
        @param aws_secret_access_key: Secret Access Key (provided by Amazon)
        
        @type is_secure: boolean
        @param is_secure: Whether the connection is over SSL
        
        @type https_connection_factory: list or tuple
        @param https_connection_factory: A pair of an HTTP connection
                                         factory and the exceptions to catch.
                                         The factory should have a similar
                                         interface to L{httplib.HTTPSConnection}.
        
        @type proxy:
        @param proxy:
        
        @type proxy_port: int
        @param proxy_port: The port to use when connecting over a proxy
        
        @type proxy_user: string
        @param proxy_user: The username to connect with on the proxy
        
        @type proxy_pass: string
        @param proxy_pass: The password to use when connection over a proxy.
        
        @type port: integer
        @param port: The port to use to connect
        """
        
        self.num_retries = 5
        self.is_secure = is_secure
        self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass)
        # define exceptions from httplib that we want to catch and retry
        self.http_exceptions = (httplib.HTTPException, socket.error, socket.gaierror)
        # define values in socket exceptions we don't want to catch
        self.socket_exception_values = (errno.EINTR,)
        if https_connection_factory is not None:
            self.https_connection_factory = https_connection_factory[0]
            self.http_exceptions += https_connection_factory[1]
        else:
            self.https_connection_factory = None
        if (is_secure):
            self.protocol = 'https'
        else:
            self.protocol = 'http'
        self.server = server
        if debug:
            self.debug = debug
        else:
            self.debug = config.getint('Boto', 'debug', debug)
        if port:
            self.port = port
        else:
            self.port = PORTS_BY_SECURITY[is_secure]
        if self.port == 80:
            self.server_name = server
        else:
            self.server_name = '%s:%d' % (server, self.port)
        
        if aws_access_key_id:
            self.aws_access_key_id = aws_access_key_id
        elif os.environ.has_key('AWS_ACCESS_KEY_ID'):
            self.aws_access_key_id = os.environ['AWS_ACCESS_KEY_ID']
        elif config.has_option('Credentials', 'aws_access_key_id'):
            self.aws_access_key_id = config.get('Credentials', 'aws_access_key_id')
        
        if aws_secret_access_key:
            self.aws_secret_access_key = aws_secret_access_key
        elif os.environ.has_key('AWS_SECRET_ACCESS_KEY'):
            self.aws_secret_access_key = os.environ['AWS_SECRET_ACCESS_KEY']
        elif config.has_option('Credentials', 'aws_secret_access_key'):
            self.aws_secret_access_key = config.get('Credentials', 'aws_secret_access_key')

        # initialize an HMAC for signatures, make copies with each request
        self.hmac = hmac.new(self.aws_secret_access_key, digestmod=sha)
        if sha256:
            self.hmac_256 = hmac.new(self.aws_secret_access_key, digestmod=sha256)
        else:
            self.hmac_256 = None

        # cache up to 20 connections
        self._cache = boto.utils.LRUCache(20)
        self.refresh_http_connection(self.server, self.is_secure)
        self._last_rs = None
Example #44
0
    def _mexe(self, request, sender=None, override_num_retries=None,
              retry_handler=None):
        """
        mexe - Multi-execute inside a loop, retrying multiple times to handle
               transient Internet errors by simply trying again.
               Also handles redirects.

        This code was inspired by the S3Utils classes posted to the boto-users
        Google group by Larry Bates.  Thanks!

        """
        boto.log.debug('Method: %s' % request.method)
        boto.log.debug('Path: %s' % request.path)
        boto.log.debug('Data: %s' % request.body)
        boto.log.debug('Headers: %s' % request.headers)
        boto.log.debug('Host: %s' % request.host)
        boto.log.debug('Port: %s' % request.port)
        boto.log.debug('Params: %s' % request.params)
        response = None
        body = None
        e = None
        if override_num_retries is None:
            num_retries = config.getint('Boto', 'num_retries', self.num_retries)
        else:
            num_retries = override_num_retries
        i = 0
        connection = self.get_http_connection(request.host, request.port,
                                              self.is_secure)

        # Convert body to bytes if needed
        if not isinstance(request.body, bytes) and hasattr(request.body,
                                                           'encode'):
            request.body = request.body.encode('utf-8')

        while i <= num_retries:
            # Use binary exponential backoff to desynchronize client requests.
            next_sleep = min(random.random() * (2 ** i),
                             boto.config.get('Boto', 'max_retry_delay', 60))
            try:
                # we now re-sign each request before it is retried
                boto.log.debug('Token: %s' % self.provider.security_token)
                request.authorize(connection=self)
                # Only force header for non-s3 connections, because s3 uses
                # an older signing method + bucket resource URLs that include
                # the port info. All others should be now be up to date and
                # not include the port.
                if 's3' not in self._required_auth_capability():
                    if not getattr(self, 'anon', False):
                        self.set_host_header(request)
                boto.log.debug('Final headers: %s' % request.headers)
                request.start_time = datetime.now()
                if callable(sender):
                    response = sender(connection, request.method, request.path,
                                      request.body, request.headers)
                else:
                    connection.request(request.method, request.path,
                                       request.body, request.headers)
                    response = connection.getresponse()
                boto.log.debug('Response headers: %s' % response.getheaders())
                location = response.getheader('location')
                # -- gross hack --
                # http_client gets confused with chunked responses to HEAD requests
                # so I have to fake it out
                if request.method == 'HEAD' and getattr(response,
                                                        'chunked', False):
                    response.chunked = 0
                if callable(retry_handler):
                    status = retry_handler(response, i, next_sleep)
                    if status:
                        msg, i, next_sleep = status
                        if msg:
                            boto.log.debug(msg)
                        time.sleep(next_sleep)
                        continue
                if response.status in [500, 502, 503, 504]:
                    msg = 'Received %d response.  ' % response.status
                    msg += 'Retrying in %3.1f seconds' % next_sleep
                    boto.log.debug(msg)
                    body = response.read()
                    if isinstance(body, bytes):
                        body = body.decode('utf-8')
                elif response.status < 300 or response.status >= 400 or \
                        not location:
                    # don't return connection to the pool if response contains
                    # Connection:close header, because the connection has been
                    # closed and default reconnect behavior may do something
                    # different than new_http_connection. Also, it's probably
                    # less efficient to try to reuse a closed connection.
                    conn_header_value = response.getheader('connection')
                    if conn_header_value == 'close':
                        connection.close()
                    else:
                        self.put_http_connection(request.host, request.port,
                                                 self.is_secure, connection)
                    if self.request_hook is not None:
                        self.request_hook.handle_request_data(request, response)
                    return response
                else:
                    scheme, request.host, request.path, \
                        params, query, fragment = urlparse(location)
                    if query:
                        request.path += '?' + query
                    # urlparse can return both host and port in netloc, so if
                    # that's the case we need to split them up properly
                    if ':' in request.host:
                        request.host, request.port = request.host.split(':', 1)
                    msg = 'Redirecting: %s' % scheme + '://'
                    msg += request.host + request.path
                    boto.log.debug(msg)
                    connection = self.get_http_connection(request.host,
                                                          request.port,
                                                          scheme == 'https')
                    response = None
                    continue
            except PleaseRetryException as e:
                boto.log.debug('encountered a retry exception: %s' % e)
                connection = self.new_http_connection(request.host, request.port,
                                                      self.is_secure)
                response = e.response
            except self.http_exceptions as e:
                for unretryable in self.http_unretryable_exceptions:
                    if isinstance(e, unretryable):
                        boto.log.debug(
                            'encountered unretryable %s exception, re-raising' %
                            e.__class__.__name__)
                        raise
                boto.log.debug('encountered %s exception, reconnecting' % \
                                  e.__class__.__name__)
                connection = self.new_http_connection(request.host, request.port,
                                                      self.is_secure)
            time.sleep(next_sleep)
            i += 1
        # If we made it here, it's because we have exhausted our retries
        # and stil haven't succeeded.  So, if we have a response object,
        # use it to raise an exception.
        # Otherwise, raise the exception that must have already happened.
        if self.request_hook is not None:
            self.request_hook.handle_request_data(request, response, error=True)
        if response:
            raise BotoServerError(response.status, response.reason, body)
        elif e:
            raise
        else:
            msg = 'Please report this exception as a Boto Issue!'
            raise BotoClientError(msg)
    def send_file(self, key, fp, headers, cb=None, num_cb=10):
        """
        Upload a file to a key into a bucket on GS, using GS resumable upload
        protocol.

        :type key: :class:`boto.s3.key.Key` or subclass
        :param key: The Key object to which data is to be uploaded

        :type fp: file-like object
        :param fp: The file pointer to upload

        :type headers: dict
        :param headers: The headers to pass along with the PUT request

        :type cb: function
        :param cb: a callback function that will be called to report progress on
            the upload.  The callback should accept two integer parameters, the
            first representing the number of bytes that have been successfully
            transmitted to GS, and the second representing the total number of
            bytes that need to be transmitted.

        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the cb
            parameter, this parameter determines the granularity of the callback
            by defining the maximum number of times the callback will be called
            during the file transfer. Providing a negative integer will cause
            your callback to be called with each buffer read.

        Raises ResumableUploadException if a problem occurs during the transfer.
        """

        if not headers:
            headers = {}
        # If Content-Type header is present and set to None, remove it.
        # This is gsutil's way of asking boto to refrain from auto-generating
        # that header.
        CT = 'Content-Type'
        if CT in headers and headers[CT] is None:
          del headers[CT]

        headers['User-Agent'] = UserAgent

        # Determine file size different ways for case where fp is actually a
        # wrapper around a Key vs an actual file.
        if isinstance(fp, KeyFile):
          file_length = fp.getkey().size
        else:
          fp.seek(0, os.SEEK_END)
          file_length = fp.tell()
          fp.seek(0)
        debug = key.bucket.connection.debug

        # Compute the MD5 checksum on the fly.
        self.md5sum = md5()

        # Use num-retries from constructor if one was provided; else check
        # for a value specified in the boto config file; else default to 5.
        if self.num_retries is None:
            self.num_retries = config.getint('Boto', 'num_retries', 6)
        self.progress_less_iterations = 0

        while True:  # Retry as long as we're making progress.
            server_had_bytes_before_attempt = self.server_has_bytes
            self.md5sum_before_attempt = self.md5sum.copy()
            try:
                # Save generation and meta_generation in class state so caller
                # can find these values, for use in preconditions of future
                # operations on the uploaded object.
                (etag, self.generation, self.meta_generation) = (
                    self._attempt_resumable_upload(key, fp, file_length,
                                                   headers, cb, num_cb,
                                                   self.md5sum))

                # Get the final md5 for the uploaded content.
                hd = self.md5sum.hexdigest()
                key.md5, key.base64md5 = key.get_md5_from_hexdigest(hd)

                # Upload succceded, so remove the tracker file (if have one).
                self._remove_tracker_file()
                self._check_final_md5(key, etag)
                if debug >= 1:
                    print 'Resumable upload complete.'
                return
            except self.RETRYABLE_EXCEPTIONS, e:
                if debug >= 1:
                    print('Caught exception (%s)' % e.__repr__())
                if isinstance(e, IOError) and e.errno == errno.EPIPE:
                    # Broken pipe error causes httplib to immediately
                    # close the socket (http://bugs.python.org/issue5542),
                    # so we need to close the connection before we resume
                    # the upload (which will cause a new connection to be
                    # opened the next time an HTTP request is sent).
                    key.bucket.connection.connection.close()
            except ResumableUploadException, e:
                self.handle_resumable_upload_exception(e, debug)
    def send_file(self, key, fp, headers, cb=None, num_cb=10):
        """
        Upload a file to a key into a bucket on GS, using GS resumable upload
        protocol.

        :type key: :class:`boto.s3.key.Key` or subclass
        :param key: The Key object to which data is to be uploaded

        :type fp: file-like object
        :param fp: The file pointer to upload

        :type headers: dict
        :param headers: The headers to pass along with the PUT request

        :type cb: function
        :param cb: a callback function that will be called to report progress on
            the upload.  The callback should accept two integer parameters, the
            first representing the number of bytes that have been successfully
            transmitted to GS, and the second representing the total number of
            bytes that need to be transmitted.

        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the cb
            parameter, this parameter determines the granularity of the callback
            by defining the maximum number of times the callback will be called
            during the file transfer. Providing a negative integer will cause
            your callback to be called with each buffer read.

        Raises ResumableUploadException if a problem occurs during the transfer.
        """

        if not headers:
            headers = {}
        # If Content-Type header is present and set to None, remove it.
        # This is gsutil's way of asking boto to refrain from auto-generating
        # that header.
        CT = 'Content-Type'
        if CT in headers and headers[CT] is None:
            del headers[CT]

        # Determine file size different ways for case where fp is actually a
        # wrapper around a Key vs an actual file.
        if isinstance(fp, KeyFile):
            file_length = fp.getkey().size
        else:
            fp.seek(0, os.SEEK_END)
            file_length = fp.tell()
            fp.seek(0)
        debug = key.bucket.connection.debug

        # Compute the MD5 checksum on the fly.
        self.md5sum = md5()

        # Use num-retries from constructor if one was provided; else check
        # for a value specified in the boto config file; else default to 5.
        if self.num_retries is None:
            self.num_retries = config.getint('Boto', 'num_retries', 6)
        self.progress_less_iterations = 0

        while True:  # Retry as long as we're making progress.
            server_had_bytes_before_attempt = self.server_has_bytes
            self.md5sum_before_attempt = self.md5sum.copy()
            try:
                etag = self._attempt_resumable_upload(key, fp, file_length,
                                                      headers, cb, num_cb,
                                                      self.md5sum)

                # Get the final md5 for the uploaded content.
                hd = self.md5sum.hexdigest()
                key.md5, key.base64md5 = key.get_md5_from_hexdigest(hd)

                # Upload succceded, so remove the tracker file (if have one).
                self._remove_tracker_file()
                self._check_final_md5(key, etag)
                if debug >= 1:
                    print('Resumable upload complete.')
                return
            except self.RETRYABLE_EXCEPTIONS as e:
                if debug >= 1:
                    print('Caught exception (%s)' % e.__repr__())
                if isinstance(e, IOError) and e.errno == errno.EPIPE:
                    # Broken pipe error causes httplib to immediately
                    # close the socket (http://bugs.python.org/issue5542),
                    # so we need to close the connection before we resume
                    # the upload (which will cause a new connection to be
                    # opened the next time an HTTP request is sent).
                    key.bucket.connection.connection.close()
            except ResumableUploadException as e:
                self.handle_resumable_upload_exception(e, debug)

            self.track_progress_less_iterations(
                server_had_bytes_before_attempt, True, debug)
    def get_file(self, key, fp, headers, cb=None, num_cb=10, torrent=False,
                 version_id=None):
        """
        Retrieves a file from a Key
        :type key: :class:`boto.s3.key.Key` or subclass
        :param key: The Key object from which upload is to be downloaded
        
        :type fp: file
        :param fp: File pointer into which data should be downloaded
        
        :type headers: string
        :param: headers to send when retrieving the files
        
        :type cb: function
        :param cb: (optional) a callback function that will be called to report
             progress on the download.  The callback should accept two integer
             parameters, the first representing the number of bytes that have
             been successfully transmitted from the storage service and
             the second representing the total number of bytes that need
             to be transmitted.
        
        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the cb
             parameter this parameter determines the granularity of the callback
             by defining the maximum number of times the callback will be
             called during the file transfer.
             
        :type torrent: bool
        :param torrent: Flag for whether to get a torrent for the file

        :type version_id: string
        :param version_id: The version ID (optional)

        Raises ResumableDownloadException if a problem occurs during
            the transfer.
        """

        debug = key.bucket.connection.debug
        if not headers:
            headers = {}

        # Use num-retries from constructor if one was provided; else check
        # for a value specified in the boto config file; else default to 5.
        if self.num_retries is None:
            self.num_retries = config.getint('Boto', 'num_retries', 5)
        progress_less_iterations = 0

        while True:  # Retry as long as we're making progress.
            had_file_bytes_before_attempt = get_cur_file_size(fp)
            try:
                self._attempt_resumable_download(key, fp, headers, cb, num_cb,
                                                 torrent, version_id)
                # Download succceded, so remove the tracker file (if have one).
                self._remove_tracker_file()
                self._check_final_md5(key, fp.name)
                if debug >= 1:
                    print 'Resumable download complete.'
                return
            except self.RETRYABLE_EXCEPTIONS, e:
                if debug >= 1:
                    print('Caught exception (%s)' % e.__repr__())
            except ResumableDownloadException, e:
                if (e.disposition ==
                    ResumableTransferDisposition.ABORT_CUR_PROCESS):
                    if debug >= 1:
                        print('Caught non-retryable ResumableDownloadException '
                              '(%s)' % e.message)
                    raise
                elif (e.disposition ==
                    ResumableTransferDisposition.ABORT):
                    if debug >= 1:
                        print('Caught non-retryable ResumableDownloadException '
                              '(%s); aborting and removing tracker file' %
                              e.message)
                    self._remove_tracker_file()
                    raise
                else:
                    if debug >= 1:
                        print('Caught ResumableDownloadException (%s) - will '
                              'retry' % e.message)
Example #48
0
def GetMaxRetryDelay():
  return config.getint('Boto', 'max_retry_delay', 32)
Example #49
0
    def __init__(self, host, aws_access_key_id=None, aws_secret_access_key=None,
                 is_secure=True, port=None, proxy=None, proxy_port=None,
                 proxy_user=None, proxy_pass=None, debug=0,
                 https_connection_factory=None, path='/', provider='aws'):
        """
        :type host: str
        :param host: The host to make the connection to
       
        :keyword str aws_access_key_id: Your AWS Access Key ID (provided by
            Amazon). If none is specified, the value in your 
            ``AWS_ACCESS_KEY_ID`` environmental variable is used.
        :keyword str aws_secret_access_key: Your AWS Secret Access Key 
            (provided by Amazon). If none is specified, the value in your 
            ``AWS_SECRET_ACCESS_KEY`` environmental variable is used.

        :type is_secure: boolean
        :param is_secure: Whether the connection is over SSL

        :type https_connection_factory: list or tuple
        :param https_connection_factory: A pair of an HTTP connection
                                         factory and the exceptions to catch.
                                         The factory should have a similar
                                         interface to L{httplib.HTTPSConnection}.

        :param str proxy: Address/hostname for a proxy server

        :type proxy_port: int
        :param proxy_port: The port to use when connecting over a proxy

        :type proxy_user: str
        :param proxy_user: The username to connect with on the proxy

        :type proxy_pass: str
        :param proxy_pass: The password to use when connection over a proxy.

        :type port: int
        :param port: The port to use to connect
        """
        self.num_retries = 5
        # Override passed-in is_secure setting if value was defined in config.
        if config.has_option('Boto', 'is_secure'):
            is_secure = config.getboolean('Boto', 'is_secure')
        self.is_secure = is_secure
        self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass)
        # define exceptions from httplib that we want to catch and retry
        self.http_exceptions = (httplib.HTTPException, socket.error,
                                socket.gaierror)
        # define values in socket exceptions we don't want to catch
        self.socket_exception_values = (errno.EINTR,)
        if https_connection_factory is not None:
            self.https_connection_factory = https_connection_factory[0]
            self.http_exceptions += https_connection_factory[1]
        else:
            self.https_connection_factory = None
        if (is_secure):
            self.protocol = 'https'
        else:
            self.protocol = 'http'
        self.host = host
        self.path = path
        if debug:
            self.debug = debug
        else:
            self.debug = config.getint('Boto', 'debug', debug)
        if port:
            self.port = port
        else:
            self.port = PORTS_BY_SECURITY[is_secure]

        # Timeout used to tell httplib how long to wait for socket timeouts.
        # Default is to leave timeout unchanged, which will in turn result in
        # the socket's default global timeout being used. To specify a
        # timeout, set http_socket_timeout in Boto config. Regardless,
        # timeouts will only be applied if Python is 2.6 or greater.
        self.http_connection_kwargs = {}
        if (sys.version_info[0], sys.version_info[1]) >= (2, 6):
            if config.has_option('Boto', 'http_socket_timeout'):
                timeout = config.getint('Boto', 'http_socket_timeout')
                self.http_connection_kwargs['timeout'] = timeout

        self.provider = Provider(provider,
                                 aws_access_key_id,
                                 aws_secret_access_key)

        # allow config file to override default host
        if self.provider.host:
            self.host = self.provider.host

        # cache up to 20 connections per host, up to 20 hosts
        self._pool = ConnectionPool(20, 20)
        self._connection = (self.server_name(), self.is_secure)
        self._last_rs = None
        self._auth_handler = auth.get_auth_handler(
              host, config, self.provider, self._required_auth_capability()) 
Example #50
0
def ResumableThreshold():
  return config.getint('GSUtil', 'resumable_threshold', 8 * ONE_MIB)
Example #51
0
    def _mexe(self, method, path, data, headers, host=None, sender=None,
              override_num_retries=None):
        """
        mexe - Multi-execute inside a loop, retrying multiple times to handle
               transient Internet errors by simply trying again.
               Also handles redirects.

        This code was inspired by the S3Utils classes posted to the boto-users
        Google group by Larry Bates.  Thanks!
        """
        boto.log.debug('Method: %s' % method)
        boto.log.debug('Path: %s' % path)
        boto.log.debug('Data: %s' % data)
        boto.log.debug('Headers: %s' % headers)
        boto.log.debug('Host: %s' % host)
        response = None
        body = None
        e = None
        if override_num_retries is None:
            num_retries = config.getint('Boto', 'num_retries', self.num_retries)
        else:
            num_retries = override_num_retries
        i = 0
        connection = self.get_http_connection(host, self.is_secure)
        while i <= num_retries:
            try:
                if hasattr(sender, '__call__'):
                    response = sender(connection, method, path, data, headers)
                else:
                    connection.request(method, path, data, headers)
                    response = connection.getresponse()
                location = response.getheader('location')
                # -- gross hack --
                # httplib gets confused with chunked responses to HEAD requests
                # so I have to fake it out
                if method == 'HEAD' and getattr(response, 'chunked', False):
                    response.chunked = 0
                if response.status == 500 or response.status == 503:
                    boto.log.debug('received %d response, retrying in %d seconds' % (response.status, 2 ** i))
                    body = response.read()
                elif response.status == 408:
                    body = response.read()
                    print('-------------------------')
                    print('         4 0 8           ')
                    print('path=%s' % path)
                    print(body)
                    print('-------------------------')
                elif response.status < 300 or response.status >= 400 or \
                        not location:
                    self.put_http_connection(host, self.is_secure, connection)
                    return response
                else:
                    scheme, host, path, params, query, fragment = \
                        urlparse.urlparse(location)
                    if query:
                        path += '?' + query
                    boto.log.debug('Redirecting: %s' % scheme + '://' + host + path)
                    connection = self.get_http_connection(host, scheme == 'https')
                    continue
            except KeyboardInterrupt:
                sys.exit('Keyboard Interrupt')
            except self.http_exceptions as e:
                boto.log.debug('encountered %s exception, reconnecting' %
                               e.__class__.__name__)
                connection = self.new_http_connection(host, self.is_secure)
            time.sleep(2 ** i)
            i += 1
        # If we made it here, it's because we have exhausted our retries and stil haven't
        # succeeded.  So, if we have a response object, use it to raise an exception.
        # Otherwise, raise the exception that must have already happened.
        if response:
            raise BotoServerError(response.status, response.reason, body)
        elif e:
            raise e
        else:
            raise BotoClientError('Please report this exception as a Boto Issue!')