#!/usr/bin/python2.7 """ - Author : Nag m - Hack : Delete all objects in a bucket - Info : Delete all objects in a bucket * 101-s3-aws """ import boto from boto.s3.connection import OrdinaryCallingFormat def deleteall(name): bucket = conn.get_bucket(name) for obj in bucket.list(): print " Deleting ... ", obj.name, obj.delete() if __name__ == "__main__": conn = boto.connect_s3(calling_format=OrdinaryCallingFormat()) bucketname = "101-s3-aws" deleteall(bucketname)
if __name__ == "__main__": if 'S3_ACCESS_KEY_ID' in os.environ: Id = os.environ['S3_ACCESS_KEY_ID'] if 'S3_SECRET_ACCESS_KEY' in os.environ: Key = os.environ['S3_SECRET_ACCESS_KEY'] if 'S3_HOSTNAME' in os.environ: Host, Port = os.environ['S3_HOSTNAME'].split(':') Port = int(Port) s3 = boto.connect_s3(Id, Key, host=Host, port=Port, is_secure=False, calling_format=OrdinaryCallingFormat()) # # Bucket create # if not args.mk is False: if not args.mk: buckets = DEFAULT_BUCKETS else: buckets = args.mk create_buckets(s3, buckets) # # Bucket remove # if not args.rm is False: if not args.rm:
def make_connection(self, user): return S3Connection(user['key_id'], user['key_secret'], is_secure=False, host=self.host, port=self.port, debug=False, calling_format=OrdinaryCallingFormat() )
def get_connection(scheme, parsed_url, storage_uri): try: from boto.s3.connection import S3Connection assert hasattr(S3Connection, u'lookup') # Newer versions of boto default to using # virtual hosting for buckets as a result of # upstream deprecation of the old-style access # method by Amazon S3. This change is not # backwards compatible (in particular with # respect to upper case characters in bucket # names); so we default to forcing use of the # old-style method unless the user has # explicitly asked us to use new-style bucket # access. # # Note that if the user wants to use new-style # buckets, we use the subdomain calling form # rather than given the option of both # subdomain and vhost. The reason being that # anything addressable as a vhost, is also # addressable as a subdomain. Seeing as the # latter is mostly a convenience method of # allowing browse:able content semi-invisibly # being hosted on S3, the former format makes # a lot more sense for us to use - being # explicit about what is happening (the fact # that we are talking to S3 servers). try: from boto.s3.connection import OrdinaryCallingFormat from boto.s3.connection import SubdomainCallingFormat cfs_supported = True calling_format = OrdinaryCallingFormat() except ImportError: cfs_supported = False calling_format = None if globals.s3_use_new_style: if cfs_supported: calling_format = SubdomainCallingFormat() else: log.FatalError( u"Use of new-style (subdomain) S3 bucket addressing was" u"requested, but does not seem to be supported by the " u"boto library. Either you need to upgrade your boto " u"library or duplicity has failed to correctly detect " u"the appropriate support.", log.ErrorCode.boto_old_style) else: if cfs_supported: calling_format = OrdinaryCallingFormat() else: calling_format = None except ImportError: log.FatalError( u"This backend (s3) requires boto library, version %s or later, " u"(http://code.google.com/p/boto/)." % BOTO_MIN_VERSION, log.ErrorCode.boto_lib_too_old) if not parsed_url.hostname: # Use the default host. conn = storage_uri.connect( is_secure=(not globals.s3_unencrypted_connection)) else: assert scheme == u's3' conn = storage_uri.connect( host=parsed_url.hostname, port=parsed_url.port, is_secure=(not globals.s3_unencrypted_connection)) if hasattr(conn, u'calling_format'): if calling_format is None: log.FatalError( u"It seems we previously failed to detect support for calling " u"formats in the boto library, yet the support is there. This is " u"almost certainly a duplicity bug.", log.ErrorCode.boto_calling_format) else: conn.calling_format = calling_format else: # Duplicity hangs if boto gets a null bucket name. # HC: Caught a socket error, trying to recover raise BackendException(u'Boto requires a bucket name.') return conn
def s3_copy(aws_access_key_id, aws_secret_access_key, bucket, src, dst): try: conn = boto.connect_s3(aws_access_key_id, aws_secret_access_key, is_secure=False, calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket(bucket, validate=True) bucket.copy_key(dst, bucket.name, src) return True except Exception as e: info(e) return False
def _connect(access_id, access_key, host=None, port=None, is_secure=False): return boto.connect_s3( access_id, access_key, host=host, port=port, is_secure=is_secure, calling_format=OrdinaryCallingFormat() )
from boto.s3.connection import S3Connection from boto.s3.connection import OrdinaryCallingFormat def sizeof_fmt(num): for x in ['bytes','KB','MB','GB','TB']: if num < 1024.0: return "%3.1f %s" % (num, x) num /= 1024.0 conn = S3Connection('s3key', 's3secretkey',calling_format=OrdinaryCallingFormat()) print conn total_bytes=0 buckets = conn.get_all_buckets() for key in buckets: print key.name try: bucket=conn.get_bucket(key.name) except : print "s3 exception" bucket_size=0 for key1 in bucket: #bucket_size=0 bucket_size +=key1.size total_bytes +=key1.size sebucket= sizeof_fmt(bucket_size) print key.name print sebucket print "total bucket size " % sizeof_fmt(total_bytes) # get_bucket_size(key.name)
INSTALLED_APPS += ('gunicorn', ) # STORAGE CONFIGURATION # ------------------------------------------------------------------------------ # Uploaded Media Files # ------------------------ # See: http://django-storages.readthedocs.io/en/latest/index.html INSTALLED_APPS += ('storages', ) AWS_ACCESS_KEY_ID = env('DJANGO_AWS_ACCESS_KEY_ID') AWS_SECRET_ACCESS_KEY = env('DJANGO_AWS_SECRET_ACCESS_KEY') AWS_STORAGE_BUCKET_NAME = env('DJANGO_AWS_STORAGE_BUCKET_NAME') AWS_AUTO_CREATE_BUCKET = True AWS_QUERYSTRING_AUTH = False AWS_S3_CALLING_FORMAT = OrdinaryCallingFormat() # AWS cache settings, don't change unless you know what you're doing: AWS_EXPIRY = 60 * 60 * 24 * 7 # TODO See: https://github.com/jschneier/django-storages/issues/47 # Revert the follwing and use str after the above-mentioned bug is fixed in # either django-storage-redux or boto AWS_HEADERS = { 'Cache-Control': six.b('max-age=%d, s-maxage=%d, must-revalidate' % (AWS_EXPIRY, AWS_EXPIRY)) } # URL that handles the media served from MEDIA_ROOT, used for managing # stored files.
def main(argv=None): parser = argparse.ArgumentParser( description='extent stats via Nuxeo REST API') parser.add_argument('path', nargs=1, help="root path") parser.add_argument( 'outdir', nargs=1, ) parser.add_argument('--no-s3-check', dest='s3_check', action='store_false') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() # look up all the files in S3, so we can double check that all # the files exist as we loop through Nuxeo file_check = None s3_bytes = s3_count = 0 if argv.s3_check: from boto import s3 from boto.s3.connection import OrdinaryCallingFormat file_check = {} conn = s3.connect_to_region('us-west-2', calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket('data.nuxeo.cdlib.org.oregon') for count, key in enumerate(bucket.list()): file_check[key.name] = key.size if count % 50000 == 0: print('{0} s3 files memorized'.format(count), file=sys.stderr) s3_bytes = s3_bytes + key.size s3_count = len(file_check) nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) campuses = nx.children(argv.path[0]) summary_workbook = xlsxwriter.Workbook( os.path.join(argv.outdir[0], '{}-summary.xlsx'.format(today))) # cell formats header_format = summary_workbook.add_format({ 'bold': True, }) number_format = summary_workbook.add_format() number_format.set_num_format('#,##0') summary_worksheet = summary_workbook.add_worksheet('summary') # headers summary_worksheet.write(0, 1, 'deduplicated files', header_format) summary_worksheet.write(0, 2, 'deduplicated bytes', header_format) summary_worksheet.write(0, 4, 'total files', header_format) summary_worksheet.write(0, 5, 'total bytes', header_format) if argv.s3_check: summary_worksheet.write(0, 7, 'files on S3', header_format) summary_worksheet.write(0, 8, 'bytes on S3', header_format) # widths summary_worksheet.set_column( 0, 1, 10, ) summary_worksheet.set_column( 2, 2, 25, ) summary_worksheet.set_column( 3, 4, 10, ) summary_worksheet.set_column( 5, 5, 25, ) summary_worksheet.set_column( 6, 7, 10, ) summary_worksheet.set_column( 8, 8, 25, ) summary_worksheet.set_column( 9, 9, 10, ) true_count = dedup_total = total_count = running_total = 0 row = 1 for campus in campuses: basename = os.path.basename(campus['path']) documents = nx.nxql( 'select * from Document where ecm:path startswith"{0}"'.format( campus['path'])) (this_count, this_total, dedup_count, dedup_bytes) = forCampus(documents, basename, file_check, argv.outdir[0]) summary_worksheet.write(row, 0, basename) summary_worksheet.write(row, 1, dedup_count, number_format) summary_worksheet.write(row, 2, dedup_bytes, number_format) summary_worksheet.write(row, 3, sizeof_fmt(dedup_bytes)) summary_worksheet.write(row, 4, this_count, number_format) summary_worksheet.write(row, 5, this_total, number_format) summary_worksheet.write(row, 6, sizeof_fmt(this_total)) total_count = total_count + this_count # number of files running_total = running_total + this_total # number of bytes true_count = true_count + dedup_count dedup_total = dedup_total + dedup_bytes # number of bytes row = row + 1 summary_worksheet.write(row, 0, '{}'.format(today)) summary_worksheet.write(row, 1, true_count, number_format) summary_worksheet.write(row, 2, dedup_total, number_format) summary_worksheet.write(row, 3, sizeof_fmt(dedup_total)) summary_worksheet.write(row, 4, total_count, number_format) summary_worksheet.write(row, 5, running_total, number_format) summary_worksheet.write(row, 6, sizeof_fmt(running_total)) if argv.s3_check: summary_worksheet.write(row, 7, s3_count, number_format) summary_worksheet.write(row, 8, s3_bytes, number_format) summary_worksheet.write(row, 9, sizeof_fmt(s3_bytes)) summary_workbook.close()
def main(): argument_spec = ec2_argument_spec() argument_spec.update( dict( force=dict(required=False, default='no', type='bool'), policy=dict(required=False, default=None, type='json'), name=dict(required=True, type='str'), requester_pays=dict(default='no', type='bool'), s3_url=dict(aliases=['S3_URL'], type='str'), state=dict(default='present', type='str', choices=['present', 'absent']), tags=dict(required=False, default=None, type='dict'), versioning=dict(default=None, type='bool'), ceph=dict(default='no', type='bool') ) ) module = AnsibleModule(argument_spec=argument_spec) if not HAS_BOTO: module.fail_json(msg='boto required for this module') region, ec2_url, aws_connect_params = get_aws_connection_info(module) if region in ('us-east-1', '', None): # S3ism for the US Standard region location = Location.DEFAULT else: # Boto uses symbolic names for locations but region strings will # actually work fine for everything except us-east-1 (US Standard) location = region s3_url = module.params.get('s3_url') # allow eucarc environment variables to be used if ansible vars aren't set if not s3_url and 'S3_URL' in os.environ: s3_url = os.environ['S3_URL'] ceph = module.params.get('ceph') if ceph and not s3_url: module.fail_json(msg='ceph flavour requires s3_url') flavour = 'aws' # bucket names with .'s in them need to use the calling_format option, # otherwise the connection will fail. See https://github.com/boto/boto/issues/2836 # for more details. aws_connect_params['calling_format'] = OrdinaryCallingFormat() # Look at s3_url and tweak connection settings # if connecting to Walrus or fakes3 try: if s3_url and ceph: ceph = urlparse.urlparse(s3_url) connection = boto.connect_s3( host=ceph.hostname, port=ceph.port, is_secure=ceph.scheme == 'https', **aws_connect_params ) flavour = 'ceph' elif is_fakes3(s3_url): fakes3 = urlparse.urlparse(s3_url) connection = S3Connection( is_secure=fakes3.scheme == 'fakes3s', host=fakes3.hostname, port=fakes3.port, **aws_connect_params ) elif is_walrus(s3_url): del aws_connect_params['calling_format'] walrus = urlparse.urlparse(s3_url).hostname connection = boto.connect_walrus(walrus, **aws_connect_params) else: connection = boto.s3.connect_to_region(location, is_secure=True, **aws_connect_params) # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases if connection is None: connection = boto.connect_s3(**aws_connect_params) except boto.exception.NoAuthHandlerFound as e: module.fail_json(msg='No Authentication Handler found: %s ' % str(e)) except Exception as e: module.fail_json(msg='Failed to connect to S3: %s' % str(e)) if connection is None: # this should never happen module.fail_json(msg='Unknown error, failed to create s3 connection, no information from boto.') state = module.params.get("state") if state == 'present': create_or_update_bucket(connection, module, location, flavour=flavour) elif state == 'absent': destroy_bucket(connection, module, flavour=flavour)
def s3_save(files_to_write, capture_record): """ Write a sketch, scrape, and html file to S3 """ db.session.add(capture_record) # These are the content-types for the files S3 will be serving up reponse_types = { 'sketch': 'image/png', 'scrape': 'text/plain', 'html': 'text/html' } # Iterate through each file we need to write to s3 for capture_type, file_name in files_to_write.items(): # Connect to S3, generate Key, set path based on capture_type, write file to S3 conn = boto.connect_s3(calling_format=OrdinaryCallingFormat()) key = Key(conn.get_bucket(app.config.get('S3_BUCKET_PREFIX'))) path = "sketchy/{}/{}".format(capture_type, capture_record.id) key.key = path key.set_contents_from_filename(app.config['LOCAL_STORAGE_FOLDER'] + '/' + file_name) # Generate a URL for downloading the files url = conn.generate_url(app.config.get('S3_LINK_EXPIRATION'), 'GET', bucket=app.config.get('S3_BUCKET_PREFIX'), key=key.key, response_headers={ 'response-content-type': reponse_types[capture_type], 'response-content-disposition': 'attachment; filename=' + file_name }) # Generate appropriate url based on capture_type if capture_type == 'sketch': capture_record.sketch_url = str(url) #print capture_record.sketch_url if capture_type == 'scrape': capture_record.scrape_url = str(url) #print capture_record.scrape_url if capture_type == 'html': capture_record.html_url = str(url) #print capture_record.html_url # Remove local files if we are saving to S3 os.remove( os.path.join(app.config['LOCAL_STORAGE_FOLDER'], files_to_write['sketch'])) os.remove( os.path.join(app.config['LOCAL_STORAGE_FOLDER'], files_to_write['scrape'])) os.remove( os.path.join(app.config['LOCAL_STORAGE_FOLDER'], files_to_write['html'])) # If we don't have a finisher we are donezo # TYPO******** if capture_record.callback: capture_record.capture_status = 'S3_ITEMS_SAVED' else: capture_record.capture_status = 'S3_ITEMS_SAVED' capture_record.job_status = 'COMPLETED' db.session.commit()
def __init__(self, connection, bucket_name): self.connection = connection if bucket_name != bucket_name.lower(): self.connection.calling_format = OrdinaryCallingFormat() self.bucket = self.connection.get_bucket(bucket_name, validate=False)
def main(): argument_spec = ec2_argument_spec() argument_spec.update( dict( bucket=dict(required=True), object=dict(), src=dict(), dest=dict(default=None), mode=dict( choices=['get', 'put', 'delete', 'create', 'geturl', 'getstr'], required=True), expiry=dict(default=600, aliases=['expiration']), s3_url=dict(aliases=['S3_URL']), overwrite=dict(aliases=['force'], default=True, type='bool'), metadata=dict(type='dict'), ), ) module = AnsibleModule(argument_spec=argument_spec) if not HAS_BOTO: module.fail_json(msg='boto required for this module') bucket = module.params.get('bucket') obj = module.params.get('object') src = module.params.get('src') if module.params.get('dest'): dest = os.path.expanduser(module.params.get('dest')) mode = module.params.get('mode') expiry = int(module.params['expiry']) s3_url = module.params.get('s3_url') overwrite = module.params.get('overwrite') metadata = module.params.get('metadata') region, ec2_url, aws_connect_kwargs = get_aws_connection_info(module) if region in ('us-east-1', '', None): # S3ism for the US Standard region location = Location.DEFAULT else: # Boto uses symbolic names for locations but region strings will # actually work fine for everything except us-east-1 (US Standard) location = region if module.params.get('object'): obj = os.path.expanduser(module.params['object']) # allow eucarc environment variables to be used if ansible vars aren't set if not s3_url and 'S3_URL' in os.environ: s3_url = os.environ['S3_URL'] # Look at s3_url and tweak connection settings # if connecting to Walrus or fakes3 try: if is_fakes3(s3_url): fakes3 = urlparse.urlparse(s3_url) s3 = S3Connection(is_secure=fakes3.scheme == 'fakes3s', host=fakes3.hostname, port=fakes3.port, calling_format=OrdinaryCallingFormat(), **aws_connect_kwargs) elif is_walrus(s3_url): walrus = urlparse.urlparse(s3_url).hostname s3 = boto.connect_walrus(walrus, **aws_connect_kwargs) else: s3 = boto.s3.connect_to_region( location, is_secure=True, calling_format=OrdinaryCallingFormat(), **aws_connect_kwargs) # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases if s3 is None: s3 = boto.connect_s3(**aws_connect_kwargs) except boto.exception.NoAuthHandlerFound, e: module.fail_json(msg='No Authentication Handler found: %s ' % str(e))
def main(): argument_spec = ec2_argument_spec() argument_spec.update( dict( bucket=dict(required=True), dest=dict(default=None), encrypt=dict(default=True, type='bool'), expiry=dict(default=600, aliases=['expiration']), headers=dict(type='dict'), marker=dict(default=None), max_keys=dict(default=1000), metadata=dict(type='dict'), mode=dict(choices=[ 'get', 'put', 'delete', 'create', 'geturl', 'getstr', 'delobj', 'list' ], required=True), object=dict(), permission=dict(type='list', default=['private']), version=dict(default=None), overwrite=dict(aliases=['force'], default='always'), prefix=dict(default=None), retries=dict(aliases=['retry'], type='int', default=0), s3_url=dict(aliases=['S3_URL']), src=dict(), ), ) module = AnsibleModule(argument_spec=argument_spec) if not HAS_BOTO: module.fail_json(msg='boto required for this module') bucket = module.params.get('bucket') encrypt = module.params.get('encrypt') expiry = int(module.params['expiry']) if module.params.get('dest'): dest = os.path.expanduser(module.params.get('dest')) headers = module.params.get('headers') marker = module.params.get('marker') max_keys = module.params.get('max_keys') metadata = module.params.get('metadata') mode = module.params.get('mode') obj = module.params.get('object') version = module.params.get('version') overwrite = module.params.get('overwrite') prefix = module.params.get('prefix') retries = module.params.get('retries') s3_url = module.params.get('s3_url') src = module.params.get('src') for acl in module.params.get('permission'): if acl not in CannedACLStrings: module.fail_json(msg='Unknown permission specified: %s' % str(acl)) if overwrite not in ['always', 'never', 'different']: if module.boolean(overwrite): overwrite = 'always' else: overwrite = 'never' region, ec2_url, aws_connect_kwargs = get_aws_connection_info(module) if region in ('us-east-1', '', None): # S3ism for the US Standard region location = Location.DEFAULT else: # Boto uses symbolic names for locations but region strings will # actually work fine for everything except us-east-1 (US Standard) location = region if module.params.get('object'): obj = os.path.expanduser(module.params['object']) # allow eucarc environment variables to be used if ansible vars aren't set if not s3_url and 'S3_URL' in os.environ: s3_url = os.environ['S3_URL'] # bucket names with .'s in them need to use the calling_format option, # otherwise the connection will fail. See https://github.com/boto/boto/issues/2836 # for more details. if '.' in bucket: aws_connect_kwargs['calling_format'] = OrdinaryCallingFormat() # Look at s3_url and tweak connection settings # if connecting to Walrus or fakes3 try: if is_fakes3(s3_url): fakes3 = urlparse.urlparse(s3_url) s3 = S3Connection(is_secure=fakes3.scheme == 'fakes3s', host=fakes3.hostname, port=fakes3.port, calling_format=OrdinaryCallingFormat(), **aws_connect_kwargs) elif is_walrus(s3_url): walrus = urlparse.urlparse(s3_url).hostname s3 = boto.connect_walrus(walrus, **aws_connect_kwargs) else: aws_connect_kwargs['is_secure'] = True s3 = connect_to_aws(boto.s3, location, **aws_connect_kwargs) # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases if s3 is None: s3 = boto.connect_s3(**aws_connect_kwargs) except boto.exception.NoAuthHandlerFound, e: module.fail_json(msg='No Authentication Handler found: %s ' % str(e))
def main(): argument_spec = ec2_argument_spec() argument_spec.update( dict( name = dict(required=True, type='str'), expiration_days = dict(default=None, required=False, type='int'), expiration_date = dict(default=None, required=False, type='str'), prefix = dict(default=None, required=False), requester_pays = dict(default='no', type='bool'), rule_id = dict(required=False, type='str'), state = dict(default='present', choices=['present', 'absent']), status = dict(default='enabled', choices=['enabled', 'disabled']), storage_class = dict(default='glacier', type='str', choices=['glacier', 'standard_ia']), transition_days = dict(default=None, required=False, type='int'), transition_date = dict(default=None, required=False, type='str') ) ) module = AnsibleModule(argument_spec=argument_spec, mutually_exclusive = [ [ 'expiration_days', 'expiration_date' ], [ 'expiration_days', 'transition_date' ], [ 'transition_days', 'transition_date' ], [ 'transition_days', 'expiration_date' ] ] ) if not HAS_BOTO: module.fail_json(msg='boto required for this module') if not HAS_DATEUTIL: module.fail_json(msg='dateutil required for this module') region, ec2_url, aws_connect_params = get_aws_connection_info(module) if region in ('us-east-1', '', None): # S3ism for the US Standard region location = Location.DEFAULT else: # Boto uses symbolic names for locations but region strings will # actually work fine for everything except us-east-1 (US Standard) location = region try: connection = boto.s3.connect_to_region(location, is_secure=True, calling_format=OrdinaryCallingFormat(), **aws_connect_params) # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases if connection is None: connection = boto.connect_s3(**aws_connect_params) except (boto.exception.NoAuthHandlerFound, AnsibleAWSError) as e: module.fail_json(msg=str(e)) expiration_date = module.params.get("expiration_date") transition_date = module.params.get("transition_date") state = module.params.get("state") storage_class = module.params.get("storage_class") # If expiration_date set, check string is valid if expiration_date is not None: try: datetime.datetime.strptime(expiration_date, "%Y-%m-%dT%H:%M:%S.000Z") except ValueError as e: module.fail_json(msg="expiration_date is not a valid ISO-8601 format. The time must be midnight and a timezone of GMT must be included") if transition_date is not None: try: datetime.datetime.strptime(transition_date, "%Y-%m-%dT%H:%M:%S.000Z") except ValueError as e: module.fail_json(msg="expiration_date is not a valid ISO-8601 format. The time must be midnight and a timezone of GMT must be included") boto_required_version = (2,40,0) if storage_class == 'standard_ia' and tuple(map(int, (boto.__version__.split(".")))) < boto_required_version: module.fail_json(msg="'standard_ia' class requires boto >= 2.40.0") if state == 'present': create_lifecycle_rule(connection, module) elif state == 'absent': destroy_lifecycle_rule(connection, module)
def main(src, dest, num_processes=2, split=32, force=False, verbose=False, quiet=False, secure=True, max_tries=5): # Check that src is a valid S3 url split_rs = urlparse.urlsplit(src) if split_rs.scheme != "s3": raise ValueError("'%s' is not an S3 url" % src) # Check that dest does not exist if os.path.isdir(dest): filename = split_rs.path.split('/')[-1] dest = os.path.join(dest, filename) if os.path.exists(dest): if force: os.remove(dest) else: raise ValueError("Destination file '%s' exists, specify -f to" " overwrite" % dest) # Split out the bucket and the key s3 = boto.connect_s3() s3 = boto.connect_s3(calling_format=OrdinaryCallingFormat()) s3.is_secure = secure logger.debug("split_rs: %s" % str(split_rs)) bucket = s3.lookup(split_rs.netloc) if bucket is None: raise ValueError("'%s' is not a valid bucket" % split_rs.netloc) key = bucket.get_key(split_rs.path) if key is None: raise ValueError("'%s' does not exist." % split_rs.path) # Determine the total size and calculate byte ranges resp = s3.make_request("HEAD", bucket=bucket, key=key) if resp is None: raise ValueError("response is invalid.") size = int(resp.getheader("content-length")) logger.debug("Got headers: %s" % resp.getheaders()) # Skipping multipart if file is less than 1mb if size < 1024 * 1024: t1 = time.time() key.get_contents_to_filename(dest) t2 = time.time() - t1 size_mb = size / 1024 / 1024 logger.info( "Finished single-part download of %0.2fM in %0.2fs (%0.2fMBps)" % (size_mb, t2, size_mb / t2)) else: # Touch the file fd = os.open(dest, os.O_CREAT) os.close(fd) size_mb = size / 1024 / 1024 num_parts = (size_mb + (-size_mb % split)) // split def arg_iterator(num_parts): for min_byte, max_byte in gen_byte_ranges(size, num_parts): yield (bucket.name, key.name, dest, min_byte, max_byte, split, secure, max_tries, 0) s = size / 1024 / 1024. try: t1 = time.time() pool = Pool(processes=num_processes) pool.map_async(do_part_download, arg_iterator(num_parts)).get(9999999) t2 = time.time() - t1 logger.info("Finished downloading %0.2fM in %0.2fs (%0.2fMBps)" % (s, t2, s / t2)) except KeyboardInterrupt: logger.warning("User terminated") except Exception, err: logger.error(err)
def s3_connection(self): """ Connect to the Amazon S3 API. If the connection attempt fails because Boto can't find credentials the attempt is retried once with an anonymous connection. Called on demand by :py:attr:`s3_bucket`. :returns: A :py:class:`boto.s3.connection.S3Connection` object. :raises: :py:exc:`.CacheBackendError` when the connection to the Amazon S3 API fails. """ if not hasattr(self, 'cached_connection'): import boto from boto.exception import BotoClientError, BotoServerError, NoAuthHandlerFound from boto.s3.connection import S3Connection, SubdomainCallingFormat, OrdinaryCallingFormat try: # Configure the number of retries and the socket timeout used # by Boto. Based on the snippet given in the following email: # https://groups.google.com/d/msg/boto-users/0osmP0cUl5Y/X4NdlMGWKiEJ if not boto.config.has_section(BOTO_CONFIG_SECTION): boto.config.add_section(BOTO_CONFIG_SECTION) boto.config.set(BOTO_CONFIG_SECTION, BOTO_CONFIG_NUM_RETRIES_OPTION, str(self.config.s3_cache_retries)) boto.config.set(BOTO_CONFIG_SECTION, BOTO_CONFIG_SOCKET_TIMEOUT_OPTION, str(self.config.s3_cache_timeout)) logger.debug("Connecting to Amazon S3 API ..") endpoint = urlparse(self.config.s3_cache_url) host, _, port = endpoint.netloc.partition(':') is_secure = (endpoint.scheme == 'https') calling_format = SubdomainCallingFormat() if host == S3Connection.DefaultHost else OrdinaryCallingFormat() try: self.cached_connection = S3Connection(host=host, port=int(port) if port else None, is_secure=is_secure, calling_format=calling_format) except NoAuthHandlerFound: logger.debug("Amazon S3 API credentials missing, retrying with anonymous connection ..") self.cached_connection = S3Connection(host=host, port=int(port) if port else None, is_secure=is_secure, calling_format=calling_format, anon=True) except (BotoClientError, BotoServerError): raise CacheBackendError(""" Failed to connect to the Amazon S3 API! Most likely your credentials are not correctly configured. The Amazon S3 cache backend will be disabled for now. """) return self.cached_connection
def upload_to_s3(aws_access_key_id, aws_secret_access_key, file, bucket, key, callback=None, md5=None, reduced_redundancy=False, content_type=None): """ Uploads the given file to the AWS S3 bucket and key specified. callback is a function of the form: def callback(complete, total) The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted to S3 and the second representing the size of the to be transmitted object. Returns boolean indicating success/failure of upload. """ try: size = os.fstat(file.fileno()).st_size except Exception as e: info(e) # Not all file objects implement fileno(), # so we fall back on this file.seek(0, os.SEEK_END) size = file.tell() conn = boto.connect_s3(aws_access_key_id, aws_secret_access_key, is_secure=False, calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket(bucket, validate=True) k = Key(bucket) k.key = key if content_type: k.set_metadata('Content-Type', content_type) sent = k.set_contents_from_file(file, cb=callback, md5=md5, reduced_redundancy=reduced_redundancy, rewind=True) # Rewind for later use file.seek(0) if sent == size: return True return False
# ========== # = Celery = # ========== # celeryapp.autodiscover_tasks(INSTALLED_APPS) CELERY_ACCEPT_CONTENT = ['pickle', 'json', 'msgpack', 'yaml'] # ========== # = Assets = # ========== JAMMIT = jammit.JammitAssets(NEWSBLUR_DIR) if DEBUG: MIDDLEWARE_CLASSES += ('utils.request_introspection_middleware.DumpRequestMiddleware',) MIDDLEWARE_CLASSES += ('utils.exception_middleware.ConsoleExceptionMiddleware',) # ======= # = AWS = # ======= S3_CONN = None if BACKED_BY_AWS.get('pages_on_s3') or BACKED_BY_AWS.get('icons_on_s3'): S3_CONN = S3Connection(S3_ACCESS_KEY, S3_SECRET, calling_format=OrdinaryCallingFormat()) # if BACKED_BY_AWS.get('pages_on_s3'): # S3_PAGES_BUCKET = S3_CONN.get_bucket(S3_PAGES_BUCKET_NAME) # if BACKED_BY_AWS.get('icons_on_s3'): # S3_ICONS_BUCKET = S3_CONN.get_bucket(S3_ICONS_BUCKET_NAME) django.http.request.host_validation_re = re.compile(r"^([a-z0-9.-_\-]+|\[[a-f0-9]*:[a-f0-9:]+\])(:\d+)?$")
def s3_delete(aws_access_key_id, aws_secret_access_key, bucket, key): try: conn = boto.connect_s3(aws_access_key_id, aws_secret_access_key, is_secure=False, calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket(bucket, validate=True) k = Key(bucket=bucket, name=key) if k.exists(): k.delete() if k.exists(): return False else: return True except Exception as e: info(e) return False
def main(): argument_spec = ec2_argument_spec() argument_spec.update(dict( bucket = dict(required=True), dest = dict(default=None, type='path'), encrypt = dict(default=True, type='bool'), expiry = dict(default=600, aliases=['expiration']), headers = dict(type='dict'), marker = dict(default=None), max_keys = dict(default=1000), metadata = dict(type='dict'), mode = dict(choices=['get', 'put', 'delete', 'create', 'geturl', 'getstr', 'delobj', 'list'], required=True), object = dict(type='path'), permission = dict(type='list', default=['private']), version = dict(default=None), overwrite = dict(aliases=['force'], default='always'), prefix = dict(default=None), retries = dict(aliases=['retry'], type='int', default=0), s3_url = dict(aliases=['S3_URL']), rgw = dict(default='no', type='bool'), src = dict(), ), ) module = AnsibleModule(argument_spec=argument_spec) if not HAS_BOTO: module.fail_json(msg='boto required for this module') bucket = module.params.get('bucket') encrypt = module.params.get('encrypt') expiry = int(module.params['expiry']) if module.params.get('dest'): dest = module.params.get('dest') headers = module.params.get('headers') marker = module.params.get('marker') max_keys = module.params.get('max_keys') metadata = module.params.get('metadata') mode = module.params.get('mode') obj = module.params.get('object') version = module.params.get('version') overwrite = module.params.get('overwrite') prefix = module.params.get('prefix') retries = module.params.get('retries') s3_url = module.params.get('s3_url') rgw = module.params.get('rgw') src = module.params.get('src') for acl in module.params.get('permission'): if acl not in CannedACLStrings: module.fail_json(msg='Unknown permission specified: %s' % str(acl)) if overwrite not in ['always', 'never', 'different']: if module.boolean(overwrite): overwrite = 'always' else: overwrite = 'never' region, ec2_url, aws_connect_kwargs = get_aws_connection_info(module) if region in ('us-east-1', '', None): # S3ism for the US Standard region location = Location.DEFAULT else: # Boto uses symbolic names for locations but region strings will # actually work fine for everything except us-east-1 (US Standard) location = region if module.params.get('object'): obj = module.params['object'] # allow eucarc environment variables to be used if ansible vars aren't set if not s3_url and 'S3_URL' in os.environ: s3_url = os.environ['S3_URL'] # rgw requires an explicit url if rgw and not s3_url: module.fail_json(msg='rgw flavour requires s3_url') # bucket names with .'s in them need to use the calling_format option, # otherwise the connection will fail. See https://github.com/boto/boto/issues/2836 # for more details. if '.' in bucket: aws_connect_kwargs['calling_format'] = OrdinaryCallingFormat() # Look at s3_url and tweak connection settings # if connecting to RGW, Walrus or fakes3 try: s3 = get_s3_connection(aws_connect_kwargs, location, rgw, s3_url) except boto.exception.NoAuthHandlerFound as e: module.fail_json(msg='No Authentication Handler found: %s ' % str(e)) except Exception as e: module.fail_json(msg='Failed to connect to S3: %s' % str(e)) if s3 is None: # this should never happen module.fail_json(msg ='Unknown error, failed to create s3 connection, no information from boto.') # If our mode is a GET operation (download), go through the procedure as appropriate ... if mode == 'get': # First, we check to see if the bucket exists, we get "bucket" returned. bucketrtn = bucket_check(module, s3, bucket) if bucketrtn is False: module.fail_json(msg="Source bucket cannot be found", failed=True) # Next, we check to see if the key in the bucket exists. If it exists, it also returns key_matches md5sum check. keyrtn = key_check(module, s3, bucket, obj, version=version) if keyrtn is False: if version is not None: module.fail_json(msg="Key %s with version id %s does not exist."% (obj, version), failed=True) else: module.fail_json(msg="Key %s does not exist."%obj, failed=True) # If the destination path doesn't exist or overwrite is True, no need to do the md5um etag check, so just download. pathrtn = path_check(dest) if pathrtn is False or overwrite == 'always': download_s3file(module, s3, bucket, obj, dest, retries, version=version) # Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists. if pathrtn is True: md5_remote = keysum(module, s3, bucket, obj, version=version) md5_local = module.md5(dest) if md5_local == md5_remote: sum_matches = True if overwrite == 'always': download_s3file(module, s3, bucket, obj, dest, retries, version=version) else: module.exit_json(msg="Local and remote object are identical, ignoring. Use overwrite=always parameter to force.", changed=False) else: sum_matches = False if overwrite in ('always', 'different'): download_s3file(module, s3, bucket, obj, dest, retries, version=version) else: module.exit_json(msg="WARNING: Checksums do not match. Use overwrite parameter to force download.") # Firstly, if key_matches is TRUE and overwrite is not enabled, we EXIT with a helpful message. if sum_matches is True and overwrite == 'never': module.exit_json(msg="Local and remote object are identical, ignoring. Use overwrite parameter to force.", changed=False) # if our mode is a PUT operation (upload), go through the procedure as appropriate ... if mode == 'put': # Use this snippet to debug through conditionals: # module.exit_json(msg="Bucket return %s"%bucketrtn) # Lets check the src path. pathrtn = path_check(src) if pathrtn is False: module.fail_json(msg="Local object for PUT does not exist", failed=True) # Lets check to see if bucket exists to get ground truth. bucketrtn = bucket_check(module, s3, bucket) if bucketrtn is True: keyrtn = key_check(module, s3, bucket, obj) # Lets check key state. Does it exist and if it does, compute the etag md5sum. if bucketrtn is True and keyrtn is True: md5_remote = keysum(module, s3, bucket, obj) md5_local = module.md5(src) if md5_local == md5_remote: sum_matches = True if overwrite == 'always': upload_s3file(module, s3, bucket, obj, src, expiry, metadata, encrypt, headers) else: get_download_url(module, s3, bucket, obj, expiry, changed=False) else: sum_matches = False if overwrite in ('always', 'different'): upload_s3file(module, s3, bucket, obj, src, expiry, metadata, encrypt, headers) else: module.exit_json(msg="WARNING: Checksums do not match. Use overwrite parameter to force upload.") # If neither exist (based on bucket existence), we can create both. if bucketrtn is False and pathrtn is True: create_bucket(module, s3, bucket, location) upload_s3file(module, s3, bucket, obj, src, expiry, metadata, encrypt, headers) # If bucket exists but key doesn't, just upload. if bucketrtn is True and pathrtn is True and keyrtn is False: upload_s3file(module, s3, bucket, obj, src, expiry, metadata, encrypt, headers) # Delete an object from a bucket, not the entire bucket if mode == 'delobj': if obj is None: module.fail_json(msg="object parameter is required", failed=True) if bucket: bucketrtn = bucket_check(module, s3, bucket) if bucketrtn is True: deletertn = delete_key(module, s3, bucket, obj) if deletertn is True: module.exit_json(msg="Object %s deleted from bucket %s." % (obj, bucket), changed=True) else: module.fail_json(msg="Bucket does not exist.", changed=False) else: module.fail_json(msg="Bucket parameter is required.", failed=True) # Delete an entire bucket, including all objects in the bucket if mode == 'delete': if bucket: bucketrtn = bucket_check(module, s3, bucket) if bucketrtn is True: deletertn = delete_bucket(module, s3, bucket) if deletertn is True: module.exit_json(msg="Bucket %s and all keys have been deleted."%bucket, changed=True) else: module.fail_json(msg="Bucket does not exist.", changed=False) else: module.fail_json(msg="Bucket parameter is required.", failed=True) # Support for listing a set of keys if mode == 'list': bucket_object = get_bucket(module, s3, bucket) # If the bucket does not exist then bail out if bucket_object is None: module.fail_json(msg="Target bucket (%s) cannot be found"% bucket, failed=True) list_keys(module, bucket_object, prefix, marker, max_keys) # Need to research how to create directories without "populating" a key, so this should just do bucket creation for now. # WE SHOULD ENABLE SOME WAY OF CREATING AN EMPTY KEY TO CREATE "DIRECTORY" STRUCTURE, AWS CONSOLE DOES THIS. if mode == 'create': if bucket and not obj: bucketrtn = bucket_check(module, s3, bucket) if bucketrtn is True: module.exit_json(msg="Bucket already exists.", changed=False) else: module.exit_json(msg="Bucket created successfully", changed=create_bucket(module, s3, bucket, location)) if bucket and obj: bucketrtn = bucket_check(module, s3, bucket) if obj.endswith('/'): dirobj = obj else: dirobj = obj + "/" if bucketrtn is True: keyrtn = key_check(module, s3, bucket, dirobj) if keyrtn is True: module.exit_json(msg="Bucket %s and key %s already exists."% (bucket, obj), changed=False) else: create_dirkey(module, s3, bucket, dirobj) if bucketrtn is False: created = create_bucket(module, s3, bucket, location) create_dirkey(module, s3, bucket, dirobj) # Support for grabbing the time-expired URL for an object in S3/Walrus. if mode == 'geturl': if bucket and obj: bucketrtn = bucket_check(module, s3, bucket) if bucketrtn is False: module.fail_json(msg="Bucket %s does not exist."%bucket, failed=True) else: keyrtn = key_check(module, s3, bucket, obj) if keyrtn is True: get_download_url(module, s3, bucket, obj, expiry) else: module.fail_json(msg="Key %s does not exist."%obj, failed=True) else: module.fail_json(msg="Bucket and Object parameters must be set", failed=True) if mode == 'getstr': if bucket and obj: bucketrtn = bucket_check(module, s3, bucket) if bucketrtn is False: module.fail_json(msg="Bucket %s does not exist."%bucket, failed=True) else: keyrtn = key_check(module, s3, bucket, obj, version=version) if keyrtn is True: download_s3str(module, s3, bucket, obj, version=version) else: if version is not None: module.fail_json(msg="Key %s with version id %s does not exist."% (obj, version), failed=True) else: module.fail_json(msg="Key %s does not exist."%obj, failed=True) module.exit_json(failed=False)
def get_bucket_server(self, server, bucket): if not bucket: return OrdinaryCallingFormat.get_bucket_server( self, server, bucket) return 's3-%s.amazonaws.com' % (AWS_STORAGE_BUCKET_REGION,)
def handle(self, args): s3conn = boto.connect_s3(settings.AWS_KEY, settings.AWS_SECRET, calling_format=OrdinaryCallingFormat()) bucket = s3conn.create_bucket(settings.AWS_BUCKET) for abbr in args.abbrs: meta = db.metadata.find_one({'_id': abbr.lower()}) if not meta: log.critical( "'{0}' does not exist in the database.".format(abbr)) sys.exit(1) else: log.info("Downloading photos for {0}".format(abbr)) orig_dir = 'photos/original' xsmall_dir = 'photos/xsmall' small_dir = 'photos/small' large_dir = 'photos/large' for d in (orig_dir, xsmall_dir, small_dir, large_dir): if not os.path.exists(d): os.makedirs(d) for leg in db.legislators.find( { settings.LEVEL_FIELD: abbr, 'photo_url': { '$exists': True } }, timeout=False): fname = os.path.join(orig_dir, '{0}.jpg'.format(leg['_id'])) # if fname already exists, skip this processing step if os.path.exists(fname): continue # error retrieving photo, skip it try: tmpname, resp = scraper.urlretrieve(leg['photo_url']) except Exception as e: log.critical('error fetching %s: %s', leg['photo_url'], e) continue try: # original size, standardized filenames fname = os.path.join(orig_dir, '{0}.jpg'.format(leg['_id'])) subprocess.check_call(['convert', tmpname, fname]) _upload(fname, bucket) # xsmall - 50x70 fname = os.path.join(xsmall_dir, '{0}.jpg'.format(leg['_id'])) subprocess.check_call( ['convert', tmpname, '-resize', '50x75', fname]) _upload(fname, bucket) # small - 150x200 fname = os.path.join(small_dir, '{0}.jpg'.format(leg['_id'])) subprocess.check_call( ['convert', tmpname, '-resize', '150x200', fname]) _upload(fname, bucket) except subprocess.CalledProcessError: print('convert failed for ', fname)