def get_s3_connection(self): log.debug('Getting boto S3 connection') if self.s3_conn is None: log.debug("No S3 Connection, creating a new one.") if self.s3_url: url = urlparse(self.s3_url) host = url.hostname port = url.port path = url.path calling_format = SubdomainCallingFormat() if host.find( 'amazon' ) == -1: # assume that non-amazon won't use <bucket>.<hostname> format calling_format = OrdinaryCallingFormat() if url.scheme == 'https': is_secure = True else: is_secure = False try: self.s3_conn = S3Connection( aws_access_key_id=self.aws_access_key, aws_secret_access_key=self.aws_secret_key, is_secure=is_secure, port=port, host=host, path=path, calling_format=calling_format, # debug = 2 ) log.debug('Got boto S3 connection to %s' % self.s3_url) except Exception, e: log.error("Exception getting S3 connection: %s" % e) else: # default to Amazon connection super(EucaInterface, self).get_s3_connection()
def __init__(self, auth, credentials, settings): """ .. note:: Neither `S3Connection#__init__` nor `S3Connection#get_bucket` sends a request. :param dict auth: Not used :param dict credentials: Dict containing `access_key` and `secret_key` :param dict settings: Dict containing `bucket` """ super().__init__(auth, credentials, settings) # If a bucket has capital letters in the name # ordinary calling format MUST be used if settings['bucket'] != settings['bucket'].lower(): calling_format = OrdinaryCallingFormat() else: # if a bucket is out of the us Subdomain calling format MUST be used calling_format = SubdomainCallingFormat() self.connection = S3Connection(credentials['access_key'], credentials['secret_key'], calling_format=calling_format) self.bucket = self.connection.get_bucket(settings['bucket'], validate=False) self.encrypt_uploads = self.settings.get('encrypt_uploads', False)
def _get_s3_conn(ud): access_key = ud['access_key'] secret_key = ud['secret_key'] log.debug('Establishing boto S3 connection') if ud.has_key('s3_url'): # override the S3 host to e.g. Eucalyptus url = urlparse(ud['s3_url']) host = url.hostname port = url.port path = url.path calling_format = SubdomainCallingFormat() if host.find( 'amazon' ) == -1: # assume that non-amazon won't use <bucket>.<hostname> format calling_format = OrdinaryCallingFormat() if url.scheme == 'https': is_secure = True else: is_secure = False try: s3_conn = S3Connection( aws_access_key_id=access_key, aws_secret_access_key=secret_key, is_secure=is_secure, port=port, host=host, path=path, calling_format=calling_format, ) log.debug('Got boto S3 connection to %s' % ud['s3_url']) except Exception, e: log.error("Exception getting S3 connection: %s" % e)
def s3_endpoint_for_uri(s3_uri, c_args=None, c_kwargs=None, connection=None): # 'connection' argument is used for unit test dependency injection # Work around boto/443 (https://github.com/boto/boto/issues/443) bucket_name = urlparse(s3_uri).netloc default = 's3.amazonaws.com' if bucket_name not in s3_endpoint_for_uri.cache: try: # Attempting to use .get_bucket() with OrdinaryCallingFormat raises # a S3ResponseError (status 301). See boto/443 referenced above. c_args = c_args or () c_kwargs = c_kwargs or {} c_kwargs['calling_format'] = SubdomainCallingFormat() conn = connection or S3Connection(*c_args, **c_kwargs) s3_endpoint_for_uri.cache[bucket_name] = _S3_REGIONS.get( conn.get_bucket(bucket_name).get_location(), default) except StandardError: detail = ''.join(traceback.format_exception(*sys.exc_info())) hint = ('Bucket names containing upper case letters' ' are known to be problematic.') logger.warning('Problem getting region information for bucket %s.', bucket_name, hint=hint, detail=detail) s3_endpoint_for_uri.cache[bucket_name] = default return s3_endpoint_for_uri.cache[bucket_name]
def connect_to_s3(): port = None host = S3Connection.DefaultHost is_secure = True calling_format = SubdomainCallingFormat() try: port = settings.AWS_PORT except: pass try: host = settings.AWS_HOST except: pass try: if settings.AWS_FAKES3: is_secure = False calling_format = OrdinaryCallingFormat() except: pass return S3Connection(aws_access_key_id=settings.AWS_ACCESS_KEY, aws_secret_access_key=settings.AWS_SECRET_KEY, port=port, host=host, calling_format=calling_format, is_secure=is_secure, debug=0)
def __init__(self, gs_access_key_id=None, gs_secret_access_key=None, is_secure=True, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, host=DefaultHost, debug=0, https_connection_factory=None, calling_format=SubdomainCallingFormat(), path='/', suppress_consec_slashes=True): S3Connection.__init__(self, gs_access_key_id, gs_secret_access_key, is_secure, port, proxy, proxy_port, proxy_user, proxy_pass, host, debug, https_connection_factory, calling_format, path, "google", Bucket, suppress_consec_slashes=suppress_consec_slashes)
def __init__(self, region, access_key, secret_key, bucket_name, secure=True, num_retries=5, socket_timeout=15): self.region = region self.access_key = access_key self.secret_key = secret_key self.secure = secure self.num_retries = num_retries self.socket_timeout = socket_timeout # monkey patch for bucket_name with dots # https://github.com/boto/boto/issues/2836 if self.secure and '.' in bucket_name: self.calling_format = OrdinaryCallingFormat() else: self.calling_format = SubdomainCallingFormat() for section in boto.config.sections(): boto.config.remove_section(section) boto.config.add_section('Boto') boto.config.setbool('Boto', 'is_secure', self.secure) boto.config.set('Boto', 'http_socket_timeout', str(self.socket_timeout)) boto.config.set('Boto', 'num_retries', str(self.num_retries)) self._conn = None self.connect()
def __init__(self): super(GSConnection, self).__init__( # This is the important bit we need to add... anon=True, # ...and these are just copied in from GSConnection.__init__() bucket_class=Bucket, calling_format=SubdomainCallingFormat(), host=GSConnection.DefaultHost, provider='google')
def s3_connection(self): """ Connect to the Amazon S3 API. If the connection attempt fails because Boto can't find credentials the attempt is retried once with an anonymous connection. Called on demand by :attr:`s3_bucket`. :returns: A :class:`boto.s3.connection.S3Connection` object. :raises: :exc:`.CacheBackendError` when the connection to the Amazon S3 API fails. """ if not hasattr(self, 'cached_connection'): self.check_prerequisites() with PatchedBotoConfig(): import boto from boto.exception import BotoClientError, BotoServerError, NoAuthHandlerFound from boto.s3.connection import S3Connection, SubdomainCallingFormat, OrdinaryCallingFormat try: # Configure the number of retries and the socket timeout used # by Boto. Based on the snippet given in the following email: # https://groups.google.com/d/msg/boto-users/0osmP0cUl5Y/X4NdlMGWKiEJ if not boto.config.has_section(BOTO_CONFIG_SECTION): boto.config.add_section(BOTO_CONFIG_SECTION) boto.config.set(BOTO_CONFIG_SECTION, BOTO_CONFIG_NUM_RETRIES_OPTION, str(self.config.s3_cache_retries)) boto.config.set(BOTO_CONFIG_SECTION, BOTO_CONFIG_SOCKET_TIMEOUT_OPTION, str(self.config.s3_cache_timeout)) logger.debug("Connecting to Amazon S3 API ..") endpoint = urlparse(self.config.s3_cache_url) host, _, port = endpoint.netloc.partition(':') kw = dict( host=host, port=int(port) if port else None, is_secure=(endpoint.scheme == 'https'), calling_format=(SubdomainCallingFormat() if host == S3Connection.DefaultHost else OrdinaryCallingFormat()), ) try: self.cached_connection = S3Connection(**kw) except NoAuthHandlerFound: logger.debug( "Amazon S3 API credentials missing, retrying with anonymous connection .." ) self.cached_connection = S3Connection(anon=True, **kw) except (BotoClientError, BotoServerError): raise CacheBackendError(""" Failed to connect to the Amazon S3 API! Most likely your credentials are not correctly configured. The Amazon S3 cache backend will be disabled for now. """) return self.cached_connection
def test_get_bucket_vs_certs(): """Integration test for bucket naming issues.""" import boto.s3.connection # Add dots to try to trip up TLS certificate validation. bucket_name = bucket_name_mangle('wal-e.test.dots', delimiter='.') with pytest.raises(boto.https_connection.InvalidCertificateException): with FreshBucket(bucket_name, calling_format=SubdomainCallingFormat()): pass
def test_get_bucket_vs_certs(): """Integration test for bucket naming issues.""" import boto.s3.connection aws_access_key = os.getenv('AWS_ACCESS_KEY_ID') # Add dots to try to trip up TLS certificate validation. bucket_name = 'wal-e.test.dots.' + aws_access_key.lower() with pytest.raises(boto.https_connection.InvalidCertificateException): with FreshBucket(bucket_name, calling_format=SubdomainCallingFormat()): pass
def _get_bucket_obj(self): # try both calling formats try: s3 = self._auth_s3(calling_format=OrdinaryCallingFormat()) bucket = s3.get_bucket(self.bucket_name) self.host = s3.server_name() return bucket except S3ResponseError, e: if e.reason == 'Moved Permanently': s3 = self._auth_s3(calling_format=SubdomainCallingFormat()) bucket = s3.get_bucket(self.bucket_name) self.host = s3.server_name() return bucket
def _get_s3connection(ud): access_key = ud['access_key'] secret_key = ud['secret_key'] s3_url = ud.get('s3_url', AMAZON_S3_URL) cloud_type = ud.get('cloud_type', 'ec2') if cloud_type in ['ec2', 'eucalyptus']: if s3_url == AMAZON_S3_URL: log.info('connecting to Amazon S3 at {0}'.format(s3_url)) else: log.info('connecting to custom S3 url: {0}'.format(s3_url)) url = urlparse.urlparse(s3_url) if url.scheme == 'https': is_secure = True else: is_secure = False host = url.hostname port = url.port path = url.path if 'amazonaws' in host: # TODO fix if anyone other than Amazon uses subdomains for buckets calling_format = SubdomainCallingFormat() else: calling_format = OrdinaryCallingFormat() else: # submitted pre-parsed S3 URL # If the use has specified an alternate s3 host, such as swift (for example), # then create an s3 connection using their user data log.info("Connecting to a custom Object Store") is_secure = ud['is_secure'] host = ud['s3_host'] port = ud['s3_port'] calling_format = OrdinaryCallingFormat() path = ud['s3_conn_path'] # get boto connection s3_conn = None try: s3_conn = S3Connection( aws_access_key_id=access_key, aws_secret_access_key=secret_key, is_secure=is_secure, port=port, host=host, path=path, calling_format=calling_format, ) log.debug('Got boto S3 connection: %s' % s3_conn) except BotoServerError as e: log.error("Exception getting S3 connection; {0}".format(e)) return s3_conn
def get_s3_bucket(bucket_url, autocreate=True): """ (http[s]|s3)://key:secret@[host[:port]/]bucketprefix """ url = urlparse(bucket_url) if url.scheme not in ('http', 'https', 's3'): raise S3ConnectionError( 'Unsupported scheme "{}" in S3 bucket URL'.format(url.scheme)) is_secure = url.scheme in ('https', 's3') try: access_key = url.username secret_key = url.password api_endpoint = url.hostname port = url.port bucket_name = url.path if bucket_name.startswith('/'): # ignore leading '/' when host was specified bucket_name = bucket_name[1:] except ValueError: raise S3ConnectionError('Unable to parse the S3 bucket URL.') if not access_key or not secret_key: raise S3ConnectionError( 'No cloud access and secret keys were provided.' ' Unable to establish a connection to S3.') if not bucket_name: raise S3ConnectionError('No bucket name was provided.' ' Unable to establish a connection to S3.') if api_endpoint and api_endpoint.find('amazon') == -1: # assume that non-amazon won't use <bucket>.<hostname> format calling_format = OrdinaryCallingFormat() else: calling_format = SubdomainCallingFormat() s3_conn = boto.connect_s3(aws_access_key_id=access_key, aws_secret_access_key=secret_key, is_secure=is_secure, port=port, host=api_endpoint, calling_format=calling_format) s3_bucket = s3_conn.lookup(bucket_name) if s3_bucket is None and autocreate: s3_bucket = s3_conn.create_bucket(bucket_name) return s3_bucket
def s3_connection(self): """ Connect to the Amazon S3 API. If the connection attempt fails because Boto can't find credentials the attempt is retried once with an anonymous connection. Called on demand by :py:attr:`s3_bucket`. :returns: A :py:class:`boto.s3.connection.S3Connection` object. :raises: :py:exc:`.CacheBackendError` when the connection to the Amazon S3 API fails. """ if not hasattr(self, 'cached_connection'): from boto.exception import BotoClientError, BotoServerError, NoAuthHandlerFound from boto.s3.connection import S3Connection, SubdomainCallingFormat, OrdinaryCallingFormat try: logger.debug("Connecting to Amazon S3 API ..") endpoint = urlparse(self.config.s3_cache_url) host, _, port = endpoint.netloc.partition(':') is_secure = (endpoint.scheme == 'https') calling_format = SubdomainCallingFormat( ) if host == S3Connection.DefaultHost else OrdinaryCallingFormat( ) try: self.cached_connection = S3Connection( host=host, port=int(port) if port else None, is_secure=is_secure, calling_format=calling_format) except NoAuthHandlerFound: logger.debug( "Amazon S3 API credentials missing, retrying with anonymous connection .." ) self.cached_connection = S3Connection( host=host, port=int(port) if port else None, is_secure=is_secure, calling_format=calling_format, anon=True) except (BotoClientError, BotoServerError): raise CacheBackendError(""" Failed to connect to the Amazon S3 API! Most likely your credentials are not correctly configured. The Amazon S3 cache backend will be disabled for now. """) return self.cached_connection
def __init__(self): bucket_name = settings.AWS_MEDIA_STORAGE_BUCKET_NAME if '.' in bucket_name: calling_format = OrdinaryCallingFormat() else: calling_format = SubdomainCallingFormat() # We cannot use a function call or a partial here. Instead, we have to # create a subclass because django tries to recreate a new object by # calling the __init__ of the returned object (with no arguments). super(S3MediaStorage, self).__init__( access_key=settings.AWS_MEDIA_ACCESS_KEY_ID, secret_key=settings.AWS_MEDIA_SECRET_ACCESS_KEY, bucket_name=bucket_name, location=settings.AWS_MEDIA_BUCKET_PREFIX, host=settings.AWS_MEDIA_STORAGE_HOST, custom_domain=settings.AWS_MEDIA_DOMAIN, calling_format=calling_format, # Setting an ACL requires us to grant the user the PutObjectAcl # permission as well, even if it matches the default bucket ACL. # XXX: Ideally we would thus set it to `None`, but due to how # easy_thumbnails works internally, that causes thumbnail # generation to fail... default_acl='public-read', querystring_auth=False, ) # MEDIA_HEADERS is a list of tuples containing a regular expression # to match against a path, and a dictionary of HTTP headers to be # returned with the resource identified by the path when it is # requested. # The headers are applied in the order they where declared, and # processing stops at the first match. # E.g.: # # MEDIA_HEADERS = [ # (r'media/cache/.*', { # 'Cache-Control': 'max-age={}'.format(3600 * 24 * 365), # }) # ] # media_headers = getattr(settings, 'MEDIA_HEADERS', []) self.media_headers = [(re.compile(r), headers) for r, headers in media_headers]
def connect_s3(self, a_key, s_key, cloud=None): """ Create and return an S3-compatible connection object for the given cloud. See ``_get_cloud_info`` method for more details on the requirements for the ``cloud`` parameter. If no value is provided, the class field is used. """ if cloud is None: cloud = self.cloud ci = self._get_cloud_info(cloud) if ci['cloud_type'] == 'amazon': calling_format = SubdomainCallingFormat() else: calling_format = OrdinaryCallingFormat() s3_conn = S3Connection( aws_access_key_id=a_key, aws_secret_access_key=s_key, is_secure=ci['is_secure'], port=ci['s3_port'], host=ci['s3_host'], path=ci['s3_conn_path'], calling_format=calling_format) return s3_conn
def _get_s3connection(ud): access_key = ud['access_key'] secret_key = ud['secret_key'] s3_url = ud.get('s3_url', AMAZON_S3_URL) cloud_type = ud.get('cloud_type', 'ec2') if (cloud_type in ['ec2', 'eucalyptus']): if (s3_url == AMAZON_S3_URL): log.info('connecting to Amazon S3 at {0}'.format(s3_url)) else: log.info('connecting to custom S3 url: {0}'.format(s3_url)) url = urlparse.urlparse(s3_url) if (url.scheme == 'https'): is_secure = True else: is_secure = False host = url.hostname port = url.port path = url.path if ('amazonaws' in host): calling_format = SubdomainCallingFormat() else: calling_format = OrdinaryCallingFormat() else: log.info('Connecting to a custom Object Store') is_secure = ud['is_secure'] host = ud['s3_host'] port = ud['s3_port'] calling_format = OrdinaryCallingFormat() path = ud['s3_conn_path'] s3_conn = None try: s3_conn = S3Connection(aws_access_key_id=access_key, aws_secret_access_key=secret_key, is_secure=is_secure, port=port, host=host, path=path, calling_format=calling_format) log.debug(('Got boto S3 connection: %s' % s3_conn)) except BotoServerError as e: log.error('Exception getting S3 connection; {0}'.format(e)) return s3_conn
class S3BotoStorage(Storage): """ Amazon Simple Storage Service using Boto This storage backend supports opening files in read or write mode and supports streaming(buffering) data in chunks to S3 when writing. """ connection_class = S3Connection connection_response_error = S3ResponseError file_class = S3BotoStorageFile key_class = S3Key # used for looking up the access and secret key from env vars access_key_names = ['AWS_S3_ACCESS_KEY_ID', 'AWS_ACCESS_KEY_ID'] secret_key_names = ['AWS_S3_SECRET_ACCESS_KEY', 'AWS_SECRET_ACCESS_KEY'] access_key = setting('AWS_S3_ACCESS_KEY_ID', setting('AWS_ACCESS_KEY_ID')) secret_key = setting('AWS_S3_SECRET_ACCESS_KEY', setting('AWS_SECRET_ACCESS_KEY')) file_overwrite = setting('AWS_S3_FILE_OVERWRITE', True) headers = setting('AWS_HEADERS', {}) bucket_name = setting('AWS_STORAGE_BUCKET_NAME') auto_create_bucket = setting('AWS_AUTO_CREATE_BUCKET', False) default_acl = setting('AWS_DEFAULT_ACL', 'public-read') bucket_acl = setting('AWS_BUCKET_ACL', default_acl) querystring_auth = setting('AWS_QUERYSTRING_AUTH', True) querystring_expire = setting('AWS_QUERYSTRING_EXPIRE', 3600) reduced_redundancy = setting('AWS_REDUCED_REDUNDANCY', False) location = setting('AWS_LOCATION', '') encryption = setting('AWS_S3_ENCRYPTION', False) custom_domain = setting('AWS_S3_CUSTOM_DOMAIN') calling_format = setting('AWS_S3_CALLING_FORMAT', SubdomainCallingFormat()) secure_urls = setting('AWS_S3_SECURE_URLS', True) file_name_charset = setting('AWS_S3_FILE_NAME_CHARSET', 'utf-8') gzip = setting('AWS_IS_GZIPPED', False) preload_metadata = setting('AWS_PRELOAD_METADATA', False) gzip_content_types = setting('GZIP_CONTENT_TYPES', ( 'text/css', 'application/javascript', 'application/x-javascript', )) url_protocol = setting('AWS_S3_URL_PROTOCOL', 'http:') host = setting('AWS_S3_HOST', S3Connection.DefaultHost) use_ssl = setting('AWS_S3_USE_SSL', True) port = setting('AWS_S3_PORT', None) # The max amount of memory a returned file can take up before being # rolled over into a temporary file on disk. Default is 0: Do not roll over. max_memory_size = setting('AWS_S3_MAX_MEMORY_SIZE', 0) def __init__(self, acl=None, bucket=None, **settings): # check if some of the settings we've provided as class attributes # need to be overwritten with values passed in here for name, value in list(settings.items()): if hasattr(self, name): setattr(self, name, value) # For backward-compatibility of old differing parameter names if acl is not None: self.default_acl = acl if bucket is not None: self.bucket_name = bucket self.location = (self.location or '').lstrip('/') # Backward-compatibility: given the anteriority of the SECURE_URL setting # we fall back to https if specified in order to avoid the construction # of unsecure urls. if self.secure_urls: self.url_protocol = 'https:' self._entries = {} self._bucket = None self._connection = None if not self.access_key and not self.secret_key: self.access_key, self.secret_key = self._get_access_keys() @property def connection(self): if self._connection is None: self._connection = self.connection_class( self.access_key, self.secret_key, is_secure=self.use_ssl, calling_format=self.calling_format, host=self.host, port=self.port, ) return self._connection @property def bucket(self): """ Get the current bucket. If there is no current bucket object create it. """ if self._bucket is None: self._bucket = self._get_or_create_bucket(self.bucket_name) return self._bucket @property def entries(self): """ Get the locally cached files for the bucket. """ if self.preload_metadata and not self._entries: self._entries = dict( (self._decode_name(entry.key), entry) for entry in self.bucket.list(prefix=self.location)) return self._entries def _get_access_keys(self): """ Gets the access keys to use when accessing S3. If none are provided to the class in the constructor or in the settings then get them from the environment variables. """ def lookup_env(names): for name in names: value = os.environ.get(name) if value: return value access_key = self.access_key or lookup_env(self.access_key_names) secret_key = self.secret_key or lookup_env(self.secret_key_names) return access_key, secret_key def _get_or_create_bucket(self, name): """ Retrieves a bucket if it exists, otherwise creates it. """ try: return self.connection.get_bucket(name, validate=self.auto_create_bucket) except self.connection_response_error: if self.auto_create_bucket: bucket = self.connection.create_bucket(name) bucket.set_acl(self.bucket_acl) return bucket raise ImproperlyConfigured("Bucket %s does not exist. Buckets " "can be automatically created by " "setting AWS_AUTO_CREATE_BUCKET to " "``True``." % name) def _clean_name(self, name): """ Cleans the name so that Windows style paths work """ # Normalize Windows style paths clean_name = posixpath.normpath(name).replace('\\', '/') # os.path.normpath() can strip trailing slashes so we implement # a workaround here. if name.endswith('/') and not clean_name.endswith('/'): # Add a trailing slash as it was stripped. return clean_name + '/' else: return clean_name def _normalize_name(self, name): """ Normalizes the name so that paths like /path/to/ignored/../something.txt work. We check to make sure that the path pointed to is not outside the directory specified by the LOCATION setting. """ try: return safe_join(self.location, name) except ValueError: raise SuspiciousOperation("Attempted access to '%s' denied." % name) def _encode_name(self, name): return smart_bytes(name, encoding=self.file_name_charset) def _decode_name(self, name): return force_text(name, encoding=self.file_name_charset) def _compress_content(self, content): """Gzip a given string content.""" zbuf = BytesIO() zfile = GzipFile(mode='wb', compresslevel=6, fileobj=zbuf) try: zfile.write(content.read()) finally: zfile.close() zbuf.seek(0) content.file = zbuf content.seek(0) return content def _open(self, name, mode='rb'): name = self._normalize_name(self._clean_name(name)) f = self.file_class(name, mode, self) if not f.key: raise IOError('File does not exist: %s' % name) return f def _save(self, name, content): cleaned_name = self._clean_name(name) name = self._normalize_name(cleaned_name) headers = self.headers.copy() content_type = getattr( content, 'content_type', mimetypes.guess_type(name)[0] or self.key_class.DefaultContentType) # setting the content_type in the key object is not enough. headers.update({'Content-Type': content_type}) headers.update({'Cache-Control': 'max-age %d' % (3600 * 24 * 365 * 2)}) if self.gzip and content_type in self.gzip_content_types: content = self._compress_content(content) headers.update({'Content-Encoding': 'gzip'}) content.name = cleaned_name encoded_name = self._encode_name(name) key = self.bucket.get_key(encoded_name) if not key: key = self.bucket.new_key(encoded_name) if self.preload_metadata: self._entries[encoded_name] = key key.set_metadata('Content-Type', content_type) key.set_metadata('Cache-Control', 'max-age %d' % (3600 * 24 * 365 * 2)) self._save_content(key, content, headers=headers) return cleaned_name def _save_content(self, key, content, headers): # only pass backwards incompatible arguments if they vary from the default kwargs = {} if self.encryption: kwargs['encrypt_key'] = self.encryption key.set_contents_from_file(content, headers=headers, policy=self.default_acl, reduced_redundancy=self.reduced_redundancy, rewind=True, **kwargs) def delete(self, name): name = self._normalize_name(self._clean_name(name)) self.bucket.delete_key(self._encode_name(name)) def exists(self, name): name = self._normalize_name(self._clean_name(name)) if self.entries: return name in self.entries k = self.bucket.new_key(self._encode_name(name)) return k.exists() def listdir(self, name): name = self._normalize_name(self._clean_name(name)) # for the bucket.list and logic below name needs to end in / # But for the root path "" we leave it as an empty string if name and not name.endswith('/'): name += '/' dirlist = self.bucket.list(self._encode_name(name)) files = [] dirs = set() base_parts = name.split("/")[:-1] for item in dirlist: parts = item.name.split("/") parts = parts[len(base_parts):] if len(parts) == 1: # File files.append(parts[0]) elif len(parts) > 1: # Directory dirs.add(parts[0]) return list(dirs), files def size(self, name): name = self._normalize_name(self._clean_name(name)) if self.entries: entry = self.entries.get(name) if entry: return entry.size return 0 return self.bucket.get_key(self._encode_name(name)).size def modified_time(self, name): name = self._normalize_name(self._clean_name(name)) entry = self.entries.get(name) # only call self.bucket.get_key() if the key is not found # in the preloaded metadata. if entry is None: entry = self.bucket.get_key(self._encode_name(name)) # Parse the last_modified string to a local datetime object. return parse_ts_extended(entry.last_modified) def url(self, name, headers=None, response_headers=None): # Preserve the trailing slash after normalizing the path. name = self._normalize_name(self._clean_name(name)) if self.custom_domain: return "%s//%s/%s" % (self.url_protocol, self.custom_domain, filepath_to_uri(name)) return self.connection.generate_url(self.querystring_expire, method='GET', bucket=self.bucket.name, key=self._encode_name(name), headers=headers, query_auth=self.querystring_auth, force_http=not self.secure_urls, response_headers=response_headers) def get_available_name(self, name): """ Overwrite existing file with the same name. """ if self.file_overwrite: name = self._clean_name(name) return name return super(S3BotoStorage, self).get_available_name(name)
"See http://code.google.com/p/boto/") ACCESS_KEY_NAME = getattr(settings, 'AWS_ACCESS_KEY_ID', None) SECRET_KEY_NAME = getattr(settings, 'AWS_SECRET_ACCESS_KEY', None) HEADERS = getattr(settings, 'AWS_HEADERS', {}) STORAGE_BUCKET_NAME = getattr(settings, 'AWS_STORAGE_BUCKET_NAME', None) AUTO_CREATE_BUCKET = getattr(settings, 'AWS_AUTO_CREATE_BUCKET', False) DEFAULT_ACL = getattr(settings, 'AWS_DEFAULT_ACL', 'public-read') BUCKET_ACL = getattr(settings, 'AWS_BUCKET_ACL', DEFAULT_ACL) QUERYSTRING_AUTH = getattr(settings, 'AWS_QUERYSTRING_AUTH', True) QUERYSTRING_EXPIRE = getattr(settings, 'AWS_QUERYSTRING_EXPIRE', 3600) REDUCED_REDUNDANCY = getattr(settings, 'AWS_REDUCED_REDUNDANCY', False) LOCATION = getattr(settings, 'AWS_LOCATION', '') CUSTOM_DOMAIN = getattr(settings, 'AWS_S3_CUSTOM_DOMAIN', None) CALLING_FORMAT = getattr(settings, 'AWS_S3_CALLING_FORMAT', SubdomainCallingFormat()) SECURE_URLS = getattr(settings, 'AWS_S3_SECURE_URLS', True) FILE_NAME_CHARSET = getattr(settings, 'AWS_S3_FILE_NAME_CHARSET', 'utf-8') FILE_OVERWRITE = getattr(settings, 'AWS_S3_FILE_OVERWRITE', True) FILE_BUFFER_SIZE = getattr(settings, 'AWS_S3_FILE_BUFFER_SIZE', 5242880) IS_GZIPPED = getattr(settings, 'AWS_IS_GZIPPED', False) PRELOAD_METADATA = getattr(settings, 'AWS_PRELOAD_METADATA', False) GZIP_CONTENT_TYPES = getattr(settings, 'GZIP_CONTENT_TYPES', ( 'text/css', 'application/javascript', 'application/x-javascript', )) if IS_GZIPPED: from gzip import GzipFile
'raven.contrib.django.raven_compat', ) # Security ALLOWED_HOSTS = ('{{ cookiecutter.repo_name }}.herokuapp.com', ) # Storage # use Amazon S3 for storage for uploaded media files and static files DEFAULT_FILE_STORAGE = 'libs.s3_storages.MediaRootS3BotoStorage' STATICFILES_STORAGE = 'libs.s3_storages.StaticRootS3BotoStorage' # Amazon S3 # See: http://django-storages.readthedocs.org/en/latest/backends/amazon-S3.html#settings AWS_S3_CALLING_FORMAT = SubdomainCallingFormat() AWS_ACCESS_KEY_ID = environ.get("AWS_S3_ACCESS_KEY_ID", "") AWS_SECRET_ACCESS_KEY = environ.get("AWS_S3_SECRET_ACCESS_KEY", "") AWS_STORAGE_BUCKET_NAME = environ.get("AWS_STORAGE_BUCKET_NAME", "") AWS_HOST = environ.get("AWS_S3_HOST", "s3.amazonaws.com") AWS_AUTO_CREATE_BUCKET = False AWS_S3_FILE_OVERWRITE = False AWS_QUERYSTRING_AUTH = True AWS_PRELOAD_METADATA = True AWS_REDUCED_REDUNDANCY = False # AWS cache settings, don't change unless you know what you're doing AWS_IS_GZIPPED = False AWS_EXPIREY = 60 * 60 * 24 * 7 AWS_HEADERS = {
def __init__(self): self.conn = boto.connect_s3(is_secure=False, calling_format=SubdomainCallingFormat()) self.bucket = self.conn.get_bucket(BUCKET_NAME)
class AsyncS3Connection(AWSAuthConnection): """Sub-class that adds support for asynchronous S3 access. Callers provide their Amazon AWS access key and secret key when an instance of the class is created. Then, callers can repeatedly call 'make_request' in order to make asynchronous HTTP calls against the S3 service. Using this API rather than the standard boto API avoids blocking the calling thread until the operation is complete. """ DefaultHost = 's3.amazonaws.com' """By default, connect to this S3 endpoint.""" DefaultCallingFormat = SubdomainCallingFormat() """By default, use the S3 sub-domain format for providing bucket name.""" def __init__(self, host=DefaultHost, aws_access_key_id=None, aws_secret_access_key=None, retry_policy=S3RetryPolicy()): AWSAuthConnection.__init__(self, host, aws_access_key_id, aws_secret_access_key) self.retry_policy = retry_policy def make_request(self, method, bucket='', key='', headers=None, params=None, body=None, request_timeout=20.0, callback=None): """Start an asynchronous HTTP operation against the S3 service. When the operation is complete, the 'callback' function will be invoked, with the HTTP response object as its only parameter. If a failure occurs during execution of the operation, it may be retried, according to the retry policy with which this instance was initialized. """ CallWithRetryAsync(self.retry_policy, self._make_request, method, bucket, key, headers, params, body, request_timeout, callback=callback) def _make_request(self, method, bucket, key, headers, params, body, request_timeout, callback): """Wrapped by CallWithRetryAsync in order to support retry.""" # Build the boto HTTP request in order to create the authorization header. path = AsyncS3Connection.DefaultCallingFormat.build_path_base( bucket, key) auth_path = AsyncS3Connection.DefaultCallingFormat.build_auth_path( bucket, key) host = AsyncS3Connection.DefaultCallingFormat.build_host( self.server_name(), bucket) # Only support byte strings for now. assert not body or type( body) is str, "Only support byte strings (type=%s)." % type(body) boto_request = self.build_base_http_request(method, path, auth_path, {}, headers, body or '', host) boto_request.authorize(connection=self) # Log request for debugging. debug_body = boto_request.body[:256].decode( errors='ignore') if boto_request.body else None logging.debug('%s "%s://%s%s" headers: %s body: %s', boto_request.method, self.protocol, boto_request.host, boto_request.path, boto_request.headers, debug_body) request_url = '%s://%s%s' % (self.protocol, host, path) if params: request_url += '?' + urllib.urlencode(params) # Build the tornado http client request (different version of HTTPRequest class). tornado_request = HTTPRequest(request_url, method=method, headers=boto_request.headers, body=body, request_timeout=request_timeout) # Start the asynchronous request. When it's complete, invoke 'callback', passing the HTTP response object. http_client = AsyncHTTPClient() http_client.fetch(tornado_request, callback=callback) def _required_auth_capability(self): """Called by AWSAuthConnection.__init__ in order to determine which auth handler to construct. In this case, S3 HMAC signing should be used. """ return ['s3']
def get_connection(scheme, parsed_url, storage_uri): try: from boto.s3.connection import S3Connection assert hasattr(S3Connection, 'lookup') # Newer versions of boto default to using # virtual hosting for buckets as a result of # upstream deprecation of the old-style access # method by Amazon S3. This change is not # backwards compatible (in particular with # respect to upper case characters in bucket # names); so we default to forcing use of the # old-style method unless the user has # explicitly asked us to use new-style bucket # access. # # Note that if the user wants to use new-style # buckets, we use the subdomain calling form # rather than given the option of both # subdomain and vhost. The reason being that # anything addressable as a vhost, is also # addressable as a subdomain. Seeing as the # latter is mostly a convenience method of # allowing browse:able content semi-invisibly # being hosted on S3, the former format makes # a lot more sense for us to use - being # explicit about what is happening (the fact # that we are talking to S3 servers). try: from boto.s3.connection import OrdinaryCallingFormat from boto.s3.connection import SubdomainCallingFormat cfs_supported = True calling_format = OrdinaryCallingFormat() except ImportError: cfs_supported = False calling_format = None if globals.s3_use_new_style: if cfs_supported: calling_format = SubdomainCallingFormat() else: log.FatalError("Use of new-style (subdomain) S3 bucket addressing was" "requested, but does not seem to be supported by the " "boto library. Either you need to upgrade your boto " "library or duplicity has failed to correctly detect " "the appropriate support.", log.ErrorCode.boto_old_style) else: if cfs_supported: calling_format = OrdinaryCallingFormat() else: calling_format = None except ImportError: log.FatalError("This backend (s3) requires boto library, version %s or later, " "(http://code.google.com/p/boto/)." % BOTO_MIN_VERSION, log.ErrorCode.boto_lib_too_old) if not parsed_url.hostname: # Use the default host. conn = storage_uri.connect(is_secure=(not globals.s3_unencrypted_connection)) else: assert scheme == 's3' conn = storage_uri.connect(host=parsed_url.hostname, is_secure=(not globals.s3_unencrypted_connection)) if hasattr(conn, 'calling_format'): if calling_format is None: log.FatalError("It seems we previously failed to detect support for calling " "formats in the boto library, yet the support is there. This is " "almost certainly a duplicity bug.", log.ErrorCode.boto_calling_format) else: conn.calling_format = calling_format else: # Duplicity hangs if boto gets a null bucket name. # HC: Caught a socket error, trying to recover raise BackendException('Boto requires a bucket name.') return conn
l = Lock() def deletion_task(): conn = boto.connect_s3() bucket = conn.get_bucket(bucket_name) while True: k = q.get() l.acquire() print k l.release() bucket.delete_key(k) q.task_done() for i in range(THREADS): t = Thread(target=deletion_task) t.setDaemon(True) t.start() conn = boto.connect_s3(is_secure=False, calling_format=SubdomainCallingFormat()) bucket = conn.get_bucket(bucket_name) for k in bucket: q.put(k.key) q.join() time.sleep(0.5) print "Finished"
class S3BotoStorage(Storage): """ Amazon Simple Storage Service using Boto This storage backend supports opening files in read or write mode and supports streaming(buffering) data in chunks to S3 when writing. """ connection_class = S3Connection connection_response_error = S3ResponseError file_class = S3BotoStorageFile key_class = S3Key # used for looking up the access and secret key from env vars access_key_names = ['AWS_S3_ACCESS_KEY_ID', 'AWS_ACCESS_KEY_ID'] secret_key_names = ['AWS_S3_SECRET_ACCESS_KEY', 'AWS_SECRET_ACCESS_KEY'] security_token_names = ['AWS_SESSION_TOKEN', 'AWS_SECURITY_TOKEN'] security_token = None access_key = setting('AWS_S3_ACCESS_KEY_ID', setting('AWS_ACCESS_KEY_ID')) secret_key = setting('AWS_S3_SECRET_ACCESS_KEY', setting('AWS_SECRET_ACCESS_KEY')) file_overwrite = setting('AWS_S3_FILE_OVERWRITE', True) headers = setting('AWS_HEADERS', {}) bucket_name = setting('AWS_STORAGE_BUCKET_NAME') auto_create_bucket = setting('AWS_AUTO_CREATE_BUCKET', False) default_acl = setting('AWS_DEFAULT_ACL', 'public-read') bucket_acl = setting('AWS_BUCKET_ACL', default_acl) querystring_auth = setting('AWS_QUERYSTRING_AUTH', True) querystring_expire = setting('AWS_QUERYSTRING_EXPIRE', 3600) reduced_redundancy = setting('AWS_REDUCED_REDUNDANCY', False) location = setting('AWS_LOCATION', '') origin = setting('AWS_ORIGIN', Location.DEFAULT) encryption = setting('AWS_S3_ENCRYPTION', False) custom_domain = setting('AWS_S3_CUSTOM_DOMAIN') calling_format = setting('AWS_S3_CALLING_FORMAT', SubdomainCallingFormat()) secure_urls = setting('AWS_S3_SECURE_URLS', True) file_name_charset = setting('AWS_S3_FILE_NAME_CHARSET', 'utf-8') gzip = setting('AWS_IS_GZIPPED', False) preload_metadata = setting('AWS_PRELOAD_METADATA', False) gzip_content_types = setting('GZIP_CONTENT_TYPES', ( 'text/css', 'text/javascript', 'application/javascript', 'application/x-javascript', 'image/svg+xml', )) url_protocol = setting('AWS_S3_URL_PROTOCOL', 'http:') host = setting('AWS_S3_HOST', S3Connection.DefaultHost) use_ssl = setting('AWS_S3_USE_SSL', True) port = setting('AWS_S3_PORT') proxy = setting('AWS_S3_PROXY_HOST') proxy_port = setting('AWS_S3_PROXY_PORT') max_memory_size = setting('AWS_S3_MAX_MEMORY_SIZE', 0) def __init__(self, acl=None, bucket=None, **settings): # check if some of the settings we've provided as class attributes # need to be overwritten with values passed in here for name, value in settings.items(): if hasattr(self, name): setattr(self, name, value) # For backward-compatibility of old differing parameter names if acl is not None: self.default_acl = acl if bucket is not None: self.bucket_name = bucket check_location(self) # Backward-compatibility: given the anteriority of the SECURE_URL setting # we fall back to https if specified in order to avoid the construction # of unsecure urls. if self.secure_urls: self.url_protocol = 'https:' self._entries = {} self._bucket = None self._connection = None self._loaded_meta = False self.access_key, self.secret_key = self._get_access_keys() self.security_token = self._get_security_token() @property def connection(self): if self._connection is None: kwargs = self._get_connection_kwargs() self._connection = self.connection_class( self.access_key, self.secret_key, **kwargs ) return self._connection def _get_connection_kwargs(self): return dict( security_token=self.security_token, is_secure=self.use_ssl, calling_format=self.calling_format, host=self.host, port=self.port, proxy=self.proxy, proxy_port=self.proxy_port ) @property def bucket(self): """ Get the current bucket. If there is no current bucket object create it. """ if self._bucket is None: self._bucket = self._get_or_create_bucket(self.bucket_name) return self._bucket @property def entries(self): """ Get the locally cached files for the bucket. """ if self.preload_metadata and not self._loaded_meta: self._entries.update({ self._decode_name(entry.key): entry for entry in self.bucket.list(prefix=self.location) }) self._loaded_meta = True return self._entries def _get_access_keys(self): """ Gets the access keys to use when accessing S3. If none is provided in the settings then get them from the environment variables. """ access_key = self.access_key or lookup_env(S3BotoStorage.access_key_names) secret_key = self.secret_key or lookup_env(S3BotoStorage.secret_key_names) return access_key, secret_key def _get_security_token(self): """ Gets the security token to use when accessing S3. Get it from the environment variables. """ security_token = self.security_token or lookup_env(S3BotoStorage.security_token_names) return security_token def _get_or_create_bucket(self, name): """ Retrieves a bucket if it exists, otherwise creates it. """ try: return self.connection.get_bucket(name, validate=self.auto_create_bucket) except self.connection_response_error: if self.auto_create_bucket: bucket = self.connection.create_bucket(name, location=self.origin) if not hasattr(django_settings, 'AWS_BUCKET_ACL'): warnings.warn( "The default behavior of S3BotoStorage is insecure. By default new buckets " "are saved with an ACL of 'public-read' (globally publicly readable). To change " "to using Amazon's default of the bucket owner set AWS_DEFAULT_ACL = None, " "otherwise to silence this warning explicitly set AWS_DEFAULT_ACL." ) if self.bucket_acl: bucket.set_acl(self.bucket_acl) return bucket raise ImproperlyConfigured('Bucket %s does not exist. Buckets ' 'can be automatically created by ' 'setting AWS_AUTO_CREATE_BUCKET to ' '``True``.' % name) def _clean_name(self, name): """ Cleans the name so that Windows style paths work """ return clean_name(name) def _normalize_name(self, name): """ Normalizes the name so that paths like /path/to/ignored/../something.txt work. We check to make sure that the path pointed to is not outside the directory specified by the LOCATION setting. """ try: return safe_join(self.location, name) except ValueError: raise SuspiciousOperation("Attempted access to '%s' denied." % name) def _encode_name(self, name): return smart_str(name, encoding=self.file_name_charset) def _decode_name(self, name): return force_text(name, encoding=self.file_name_charset) def _compress_content(self, content): """Gzip a given string content.""" zbuf = io.BytesIO() # The GZIP header has a modification time attribute (see http://www.zlib.org/rfc-gzip.html) # This means each time a file is compressed it changes even if the other contents don't change # For S3 this defeats detection of changes using MD5 sums on gzipped files # Fixing the mtime at 0.0 at compression time avoids this problem zfile = GzipFile(mode='wb', fileobj=zbuf, mtime=0.0) try: zfile.write(force_bytes(content.read())) finally: zfile.close() zbuf.seek(0) content.file = zbuf content.seek(0) return content def _open(self, name, mode='rb'): name = self._normalize_name(self._clean_name(name)) f = self.file_class(name, mode, self) if not f.key: raise IOError('File does not exist: %s' % name) return f def _save(self, name, content): cleaned_name = self._clean_name(name) name = self._normalize_name(cleaned_name) headers = self.headers.copy() _type, encoding = mimetypes.guess_type(name) content_type = getattr(content, 'content_type', None) content_type = content_type or _type or self.key_class.DefaultContentType # setting the content_type in the key object is not enough. headers.update({'Content-Type': content_type}) if self.gzip and content_type in self.gzip_content_types: content = self._compress_content(content) headers.update({'Content-Encoding': 'gzip'}) elif encoding: # If the content already has a particular encoding, set it headers.update({'Content-Encoding': encoding}) content.name = cleaned_name encoded_name = self._encode_name(name) key = self.bucket.get_key(encoded_name) if not key: key = self.bucket.new_key(encoded_name) if self.preload_metadata: self._entries[encoded_name] = key key.last_modified = datetime.utcnow().strftime(ISO8601) key.set_metadata('Content-Type', content_type) self._save_content(key, content, headers=headers) return cleaned_name def _save_content(self, key, content, headers): # only pass backwards incompatible arguments if they vary from the default kwargs = {} if self.encryption: kwargs['encrypt_key'] = self.encryption key.set_contents_from_file(content, headers=headers, policy=self.default_acl, reduced_redundancy=self.reduced_redundancy, rewind=True, **kwargs) def _get_key(self, name): name = self._normalize_name(self._clean_name(name)) if self.entries: return self.entries.get(name) return self.bucket.get_key(self._encode_name(name)) def delete(self, name): name = self._normalize_name(self._clean_name(name)) self.bucket.delete_key(self._encode_name(name)) def exists(self, name): if not name: # root element aka the bucket try: self.bucket return True except ImproperlyConfigured: return False return self._get_key(name) is not None def listdir(self, name): name = self._normalize_name(self._clean_name(name)) # for the bucket.list and logic below name needs to end in / # But for the root path "" we leave it as an empty string if name and not name.endswith('/'): name += '/' dirlist = self.bucket.list(self._encode_name(name)) files = [] dirs = set() base_parts = name.split('/')[:-1] for item in dirlist: parts = item.name.split('/') parts = parts[len(base_parts):] if len(parts) == 1: # File files.append(parts[0]) elif len(parts) > 1: # Directory dirs.add(parts[0]) return list(dirs), files def size(self, name): return self._get_key(name).size def get_modified_time(self, name): dt = tz.make_aware(parse_ts(self._get_key(name).last_modified), tz.utc) return dt if setting('USE_TZ') else tz.make_naive(dt) def modified_time(self, name): dt = tz.make_aware(parse_ts(self._get_key(name).last_modified), tz.utc) return tz.make_naive(dt) def url(self, name, headers=None, response_headers=None, expire=None): # Preserve the trailing slash after normalizing the path. name = self._normalize_name(self._clean_name(name)) if self.custom_domain: return '{}//{}/{}'.format(self.url_protocol, self.custom_domain, filepath_to_uri(name)) if expire is None: expire = self.querystring_expire return self.connection.generate_url( expire, method='GET', bucket=self.bucket.name, key=self._encode_name(name), headers=headers, query_auth=self.querystring_auth, force_http=not self.secure_urls, response_headers=response_headers, ) def get_available_name(self, name, max_length=None): """ Overwrite existing file with the same name. """ name = self._clean_name(name) if self.file_overwrite: return get_available_overwrite_name(name, max_length) return super(S3BotoStorage, self).get_available_name(name, max_length)
def __init__(self, folder_name=None, access_key=None, secret_key=None, bucket_acl=None, acl=None, headers=None, gzip=None, gzip_content_types=None, querystring_auth=None, querystring_expire=None, reduced_redundancy=None, custom_domain=None, secure_urls=None, location=None, file_name_charset=None, preload_metadata=None, calling_format=None, file_overwrite=None, auto_create_bucket=None): self.access_key = access_key or \ current_app.config.get('AWS_ACCESS_KEY_ID', None) self.secret_key = secret_key or \ current_app.config.get('AWS_SECRET_ACCESS_KEY', None) self.calling_format = calling_format or \ current_app.config.get( 'AWS_S3_CALLING_FORMAT', SubdomainCallingFormat() ) self.auto_create_bucket = auto_create_bucket or \ current_app.config.get('AWS_AUTO_CREATE_BUCKET', False) self.bucket_name = folder_name or \ current_app.config.get('AWS_STORAGE_BUCKET_NAME', None) self.acl = acl or \ current_app.config.get('AWS_DEFAULT_ACL', 'public-read') self.bucket_acl = bucket_acl or \ current_app.config.get('AWS_BUCKET_ACL', self.acl) self.file_overwrite = file_overwrite or \ current_app.config.get('AWS_S3_FILE_OVERWRITE', False) self.headers = headers or \ current_app.config.get('AWS_HEADERS', {}) self.preload_metadata = preload_metadata or \ current_app.config.get('AWS_PRELOAD_METADATA', False) self.gzip = gzip or \ current_app.config.get('AWS_IS_GZIPPED', False) self.gzip_content_types = gzip_content_types or \ current_app.config.get( 'GZIP_CONTENT_TYPES', ( 'text/css', 'application/javascript', 'application/x-javascript', ) ) self.querystring_auth = querystring_auth or \ current_app.config.get('AWS_QUERYSTRING_AUTH', True) self.querystring_expire = querystring_expire or \ current_app.config.get('AWS_QUERYSTRING_EXPIRE', 3600) self.reduced_redundancy = reduced_redundancy or \ current_app.config.get('AWS_REDUCED_REDUNDANCY', False) self.custom_domain = custom_domain or \ current_app.config.get('AWS_S3_CUSTOM_DOMAIN', None) self.secure_urls = secure_urls or \ current_app.config.get('AWS_S3_SECURE_URLS', True) self.location = location or current_app.config.get('AWS_LOCATION', '') self.location = self.location.lstrip('/') self.file_name_charset = file_name_charset or \ current_app.config.get('AWS_S3_FILE_NAME_CHARSET', 'utf-8') self._connection = None self._entries = {}