def __init__(self, aws_region=None, aws_access_key_id=None, aws_secret_access_key=None, aws_s3_bucket_name=None, aws_s3_bucket_auth=True, aws_s3_max_age_seconds=None, cache_authenticated=False): self.aws_region = aws_region or settings.AWS_REGION self.aws_access_key_id = aws_access_key_id or settings.AWS_ACCESS_KEY_ID self.aws_secret_access_key = aws_secret_access_key or settings.AWS_SECRET_ACCESS_KEY self.aws_s3_bucket_name = aws_s3_bucket_name or settings.AWS_S3_BUCKET_NAME self.aws_s3_bucket_auth = aws_s3_bucket_auth self.aws_s3_max_age_seconds = aws_s3_max_age_seconds or settings.AWS_S3_MAX_AGE_SECONDS self.cache_authenticated = cache_authenticated or settings.CACHE_AUTHENTICATED_URL # Try to connect to S3 without using aws_access_key_id and aws_secret_access_key # if those are not specified, else use given id and secret. if self.aws_access_key_id == "" and self.aws_secret_access_key == "": self.s3_connection = s3.connect_to_region( self.aws_region, calling_format = "boto.s3.connection.OrdinaryCallingFormat", ) else: self.s3_connection = s3.connect_to_region( self.aws_region, aws_access_key_id = self.aws_access_key_id, aws_secret_access_key = self.aws_secret_access_key, calling_format = "boto.s3.connection.OrdinaryCallingFormat", ) self.bucket = self.s3_connection.get_bucket(self.aws_s3_bucket_name) # All done! super(S3Storage, self).__init__()
def new_environment(options): sns_conn = sns.connect_to_region(options.region) s3_conn = s3.connect_to_region(options.region) ec2_conn = ec2.connect_to_region(options.region) consul_conn = Consul(options.host, options.port) environment_service = EnvironmentService(ec2_conn, s3_conn, sns_conn, consul_conn) return Environment(environment_service, options)
def upload_to_s3_bucket(aws_access_key, aws_secret_key, s3_bucket_name, path): s3_connection = S3Connection(aws_access_key, aws_secret_key) bucket = s3_connection.get_bucket(s3_bucket_name) # Workaround for boto issue #2207 as per anna-buttfield-sirca # at https://github.com/boto/boto/issues/2207#issuecomment-60682869 bucket_location = bucket.get_location() if bucket_location: s3_connection = connect_to_region( bucket_location, aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key ) bucket = s3_connection.get_bucket(s3_bucket_name) print("Deleting existing content") for key in bucket.list(): key.delete() print("Uploading new content") for (source_path, directories, files) in os.walk(path): assert source_path.startswith(path) dest_path = source_path[len(path):] for filename in files: if filename == ".gitignore": print("Skipping .gitignore") continue print("Uploading {} from {} to {}".format(filename, source_path, dest_path)) if dest_path.startswith(".git"): print("It's in a .git* directory, skipping") continue key = Key(bucket) key.key = os.path.join(dest_path, filename) key.set_contents_from_filename(os.path.join(source_path, filename))
def __init__(self, region, name=None, app_type=None, platform=None, env=None, old_method=False): self.old_method = old_method self.name = name self.app_type = app_type self.platform = platform self.env = env self.region = region self.connection = s3.connect_to_region(region) self.bucket = self.connection.get_bucket(self.bucket_name) if self.old_method: dc = "aws1" if self.region == "sa-east-1" else "aws2" self.generic_hostname = "%sn.%s.%sn.%s" % (self.name, self.platform, dc, self.env) self.get_bootstrap = "curl -H'Referer: %s' %s/scripts/tgbootstrap-old.py > /usr/local/bin/tgbootstrap.py" % (self.s3keycheck, self.s3host) else: self.generic_hostname = "%s.%s.%s.%s.%s" % (self.app_type, self.name, self.platform, self.env, self.region) self.node_name = "%s.titansgroup.net.json" % (self.generic_hostname) self.node = self.bucket.get_key("chef/nodes/%s" % self.node_name) if self.node is not None: self.get_chef_node = "curl -H'Referer: %s' %s/chef/nodes/%s > /tmp/node.json" % (self.s3keycheck, self.s3host, self.node_name) else: raise ValueError("The node (%s) was not found on S3, upload it." % self.node_name)
def __init__(self, bucket_name, region, aws_access_key_id, aws_secret_access_key): """ Constructor :param bucket_name: Name of the bucket that the files are on. :param region: Region that the bucket is located :param aws_access_key_id: AWS Access Key ID :param aws_secret_access_key: AWS Secret Access Key Make sure the credentials have the correct permissions set up on Amazon or else S3 will return a 403 FORBIDDEN error. """ if not s3: raise ValueError('Could not import boto. You can install boto by ' 'using pip install boto') connection = s3.connect_to_region( region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, ) self.bucket = connection.get_bucket(bucket_name) self.separator = '/'
def write_fastly_summary(**kwargs): """ Output a summary of all fastly distributions on stdout, and optionally write it as an HTML file to S3 """ services = get_all_services(kwargs['fastly_api_key']) stats = get_statistics(kwargs['fastly_api_key']) table = PrettyTable(["Service", "Hit Ratio", "Bandwidth", "Data", "Requests", "% 20x", "% 30x", "% 40x", "% 50x"]) services_data = [] for service in services: service_data = {'name': service, 'hit_ratio': '-', 'bandwidth': '-', 'data': '-', 'requests': '-', '20x': '-', '30x': '-', '40x': '-', '50x': '-'} if services[service] in stats['data']: if stats['data'][services[service]][0]['hit_ratio']: hitrate = int(float(stats['data'][services[service]][0]['hit_ratio']) * 100) else: hitrate = None service_data['hit_ratio'] = hitrate service_data['bandwidth'] = stats['data'][services[service]][0]['bandwidth'] service_data['data'] = sizeof_fmt(stats['data'][services[service]][0]['bandwidth']) service_data['requests'] = stats['data'][services[service]][0]['requests'] service_data['20x'] = int(100 * (stats['data'][services[service]][0]['status_2xx'] / float(stats['data'][services[service]][0]['requests']))) service_data['30x'] = int(100 * (stats['data'][services[service]][0]['status_3xx'] / float(stats['data'][services[service]][0]['requests']))) service_data['40x'] = int(100 * (stats['data'][services[service]][0]['status_4xx'] / float(stats['data'][services[service]][0]['requests']))) service_data['50x'] = int(100 * (stats['data'][services[service]][0]['status_5xx'] / float(stats['data'][services[service]][0]['requests']))) else: LOGGER.info("No stats for service ID {0}".format(service)) table.add_row([service_data['name'], service_data['hit_ratio'], service_data['bandwidth'], service_data['data'], service_data['requests'], service_data['20x'], service_data['30x'], service_data['40x'], service_data['50x'] ]) services_data.append(service_data) rendered_template = TEMPLATE.render(services=services_data, generated=time.strftime('%H:%M %Z on %A %d %b %Y')) if kwargs['s3bucket']: LOGGER.info("Writing s3://{0}/{1}".format(kwargs['s3bucket'], kwargs['filename'])) conn = s3.connect_to_region('eu-west-1') bucket = conn.get_bucket(kwargs['s3bucket']) k = Key(bucket) k.key = kwargs['filename'] k.set_contents_from_string(rendered_template, policy=kwargs['s3acl'], headers={'Content-Type' : 'text/html'}) print table.get_string(sortby="Hit Ratio") print "Showing {0} services".format(len(services))
def get_bucket(app, user=None, password=None, bucket_name=None, location=None): user = user or app.config.get('AWS_ACCESS_KEY_ID') password = password or app.config.get('AWS_SECRET_ACCESS_KEY') bucket_name = bucket_name or app.config.get('S3_BUCKET_NAME') if not bucket_name: raise ValueError("No bucket name provided.") location = location or app.config.get('S3_REGION') # connect to s3 if not location: conn = S3Connection(user, password) # (default region) else: conn = connect_to_region(location, aws_access_key_id=user, aws_secret_access_key=password) # get_or_create bucket try: try: bucket = conn.create_bucket(bucket_name) except S3CreateError as e: if e.error_code == u'BucketAlreadyOwnedByYou': bucket = conn.get_bucket(bucket_name) else: raise e bucket.make_public(recursive=False) except S3CreateError as e: raise e return bucket
def __init__(self, aws_region=None, aws_access_key_id=None, aws_secret_access_key=None, aws_s3_bucket_name=None, aws_s3_calling_format=None, aws_s3_key_prefix=None, aws_s3_bucket_auth=None, aws_s3_max_age_seconds=None, aws_s3_public_url=None, aws_s3_reduced_redundancy=False, aws_s3_host=None, aws_s3_metadata=None): self.aws_region = settings.AWS_REGION if aws_region is None else aws_region self.aws_access_key_id = settings.AWS_ACCESS_KEY_ID if aws_access_key_id is None else aws_access_key_id self.aws_secret_access_key = settings.AWS_SECRET_ACCESS_KEY if aws_secret_access_key is None else aws_secret_access_key self.aws_s3_bucket_name = settings.AWS_S3_BUCKET_NAME if aws_s3_bucket_name is None else aws_s3_bucket_name self.aws_s3_calling_format = settings.AWS_S3_CALLING_FORMAT if aws_s3_calling_format is None else aws_s3_calling_format self.aws_s3_key_prefix = settings.AWS_S3_KEY_PREFIX if aws_s3_key_prefix is None else aws_s3_key_prefix self.aws_s3_bucket_auth = settings.AWS_S3_BUCKET_AUTH if aws_s3_bucket_auth is None else aws_s3_bucket_auth self.aws_s3_max_age_seconds = settings.AWS_S3_MAX_AGE_SECONDS if aws_s3_max_age_seconds is None else aws_s3_max_age_seconds self.aws_s3_public_url = settings.AWS_S3_PUBLIC_URL if aws_s3_public_url is None else aws_s3_public_url self.aws_s3_reduced_redundancy = settings.AWS_S3_REDUCED_REDUNDANCY if aws_s3_reduced_redundancy is None else aws_s3_reduced_redundancy self.aws_s3_host = settings.AWS_S3_HOST if aws_s3_host is None else aws_s3_host self.aws_s3_metadata = settings.AWS_S3_METADATA if aws_s3_metadata is None else aws_s3_metadata # Validate args. if self.aws_s3_public_url and self.aws_s3_bucket_auth: raise ImproperlyConfigured("Cannot use AWS_S3_BUCKET_AUTH with AWS_S3_PUBLIC_URL.") # Connect to S3. connection_kwargs = { "calling_format": self.aws_s3_calling_format, } if self.aws_access_key_id: connection_kwargs["aws_access_key_id"] = self.aws_access_key_id if self.aws_secret_access_key: connection_kwargs["aws_secret_access_key"] = self.aws_secret_access_key if self.aws_s3_host: connection_kwargs["host"] = self.aws_s3_host self.s3_connection = s3.connect_to_region(self.aws_region, **connection_kwargs) self.bucket = self.s3_connection.get_bucket(self.aws_s3_bucket_name, validate=False) # All done! super(S3Storage, self).__init__()
def action(self): region = self.get_input('Region') color = self.get_input('StackColor') hosted_zone_name = self.get_input('PublicHostedZoneName') app_lb_endpoint = self.get_input('AppServerLoadBalancerEndpoint') app_lb_hosted_zone_id = self.get_input( 'AppServerLoadBalancerHostedZoneNameID') backward_compat_hosted_zone_name = self.get_input( 'BackwardCompatPublicHostedZoneName') backward_compat_app_lb_endpoint = self.get_input( 'BackwardCompatAppServerLoadBalancerEndpoint') backward_compat_app_lb_hosted_zone_id = self.get_input( 'BackwardCompatAppServerLoadBalancerHostedZoneNameID') route53_conn = r53.connect_to_region(region, profile_name=self.aws_profile) public_hosted_zone = route53_conn.get_zone(hosted_zone_name) record_sets = r53.record.ResourceRecordSets(route53_conn, public_hosted_zone.id) record_sets.add_change('UPSERT', hosted_zone_name, 'A', alias_hosted_zone_id=app_lb_hosted_zone_id, alias_dns_name=app_lb_endpoint, alias_evaluate_target_health=True, identifier='Primary', failover='PRIMARY') record_sets.commit() backward_compat_hosted_zone = route53_conn.get_zone( backward_compat_hosted_zone_name) backward_compat_record_sets = r53.record.ResourceRecordSets( route53_conn, backward_compat_hosted_zone.id) backward_compat_record_sets.add_change( 'UPSERT', backward_compat_hosted_zone_name, 'A', alias_hosted_zone_id=backward_compat_app_lb_hosted_zone_id, alias_dns_name=backward_compat_app_lb_endpoint, alias_evaluate_target_health=True, identifier='Primary', failover='PRIMARY') backward_compat_record_sets.commit() s3_conn = s3.connect_to_region(region, profile_name=self.aws_profile, calling_format=OrdinaryCallingFormat()) bucket = s3_conn.get_bucket('tile-cache.{}'.format(hosted_zone_name)) rules = s3.website.RoutingRules() rules.add_rule(s3.website.RoutingRule( s3.website.Redirect( protocol='https', http_redirect_code=302, hostname='{}-tiles.{}'.format(color.lower(), hosted_zone_name)), s3.website.Condition(http_error_code=404))) bucket.configure_website(suffix='index.html', error_key='error.html', routing_rules=rules)
def collect_output(bucket, prefix, output_file): s3_conn = s3.connect_to_region('us-west-2') bucket = s3_conn.get_bucket(bucket) keys = bucket.list(prefix=prefix + '/output') with open(output_file, 'w') as of: for k in keys: k.get_contents_to_file(of)
def get_s3_bucket(): conn = s3.connect_to_region( 'eu-west-2', aws_access_key_id=os.environ['S3_ACCESS_KEY_ID'], aws_secret_access_key=os.environ['S3_SECRET_ACCESS_KEY'], is_secure=True, ) return conn.get_bucket('adammalinowski')
def connect(self): """Connect against aws services""" key_id = self.config["aws-access-key-id"] secret_key = self.config["aws-secret-access-key"] self.connection = s3.connect_to_region( "eu-central-1", aws_access_key_id=key_id, aws_secret_access_key=secret_key ) self.bucket = self.connection.get_bucket(self.bucket_name)
def get_s3_connection(aws_access_key_id, aws_secret_access_key, region_name): conn = s3.connect_to_region(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name=region_name) if conn is None: raise Exception("Could not get s3 connection to region {}, invalid credentials.".format(region_name)) return conn
def get_connection(host_or_region): # type: (str) -> connection.S3Connection # first case: we got a valid DNS (host) if "." in host_or_region: return connection.S3Connection(host=host_or_region) # second case: we got a region return connect_to_region(host_or_region)
def connect(self): """Connect against aws services""" key_id = self.config['aws-access-key-id'] secret_key = self.config['aws-secret-access-key'] self.connection = s3.connect_to_region(self.region, aws_access_key_id=key_id, aws_secret_access_key=secret_key) self.bucket = self.connection.get_bucket(self.bucket_name)
def backup(): """Backup Postgres to S3 using pg_dump""" key_name = S3_KEY_NAME if not key_name.endswith("/") and key_name != "": key_name = "%s/" % key_name # add the file name date suffix now = datetime.now() FILENAME_SUFFIX = "_%(year)d%(month)02d%(day)02d_%(hour)02d%(minute)02d%(second)02d" % { 'year': now.year, 'month': now.month, 'day': now.day, 'hour': now.hour, 'minute': now.minute, 'second': now.second } FILENAME = ARCHIVE_NAME + FILENAME_SUFFIX + ".tar.gz" log.info("Preparing " + FILENAME + " from the database dump ...") # create postgres databeses dump with tempfile.NamedTemporaryFile() as t1: proc1 = subprocess.Popen(POSTGRES_DUMP_PATH, shell=True, universal_newlines=True, stdout=t1) proc1.wait() t1.flush() # create tar.gz for the above two files t2 = tempfile.NamedTemporaryFile() tar = tarfile.open(t2.name, "w|gz") tar.add(t1.name, ARCHIVE_NAME + ".sql") tar.close() log.info("Uploading the " + FILENAME + " file to Amazon S3 ...") # get bucket conn = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) try: bucket = conn.get_bucket(S3_BUCKET_NAME) # Uggly fix to go around bug in boto https://github.com/boto/boto/issues/2207 bucket_location = bucket.get_location() conn = connect_to_region(bucket_location, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) bucket = conn.get_bucket(S3_BUCKET_NAME) except S3ResponseError as e: log.info("Error details: {}".format(e.message)) sys.stderr.write("There is no bucket with the name \"" + S3_BUCKET_NAME + "\" in your Amazon S3 account\n") sys.stderr.write("Error: Please enter an appropriate bucket name and re-run the script\n") t2.close() return # upload file to Amazon S3 k = Key(bucket) k.key = key_name + FILENAME k.set_contents_from_filename(t2.name) t2.close() log.info("Sucessfully uploaded the archive to Amazon S3")
def __init__(self): super(SimpleStorageServiceIntegration, self).__init__() bucket_name = self.config.get_property( self.config.SECTION_AWS_S3, self.config.OPTION_BUCKET_NAME) self.logger.debug("Connecting to AWS S3") conn = s3.connect_to_region(self.default_region) self.logger.debug("Getting %s bucket" % bucket_name) self.bucket = conn.get_bucket(bucket_name)
def create_s3_connection(aws_key, aws_secret_key): # TODO: make region configurable region = u'eu-west-1' conn = connect_to_region( region, aws_access_key_id=aws_key, aws_secret_access_key=aws_secret_key ) logger.info('Connected to S3 region: %s', conn._connection[0]) return conn
def download_from_s3(aws_region, s3_bucket, filename): print 'Downloading data..' conn = s3.connect_to_region(aws_region, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, is_secure=False) bucket = conn.get_bucket(s3_bucket) key = bucket.get_key(filename) key.get_contents_to_filename('./data/' + filename) print 'Downloading finished..'
def connect(self): time2connect=time.time() self.con=s3.connect_to_region( self.args.region, aws_access_key_id=self.args.key_id, aws_secret_access_key=self.args.secret, is_secure=True ) self.bucket=self.con.get_bucket(self.args.bucket) self.time2connect=time.time()-time2connect
def __init__(self, region, key=None, secret=None, host=None): if region is None and host is not None: self.connection = connection.S3Connection( host=host, aws_access_key_id=key, aws_secret_access_key=secret) else: self.connection = connect_to_region( region, aws_access_key_id=key, aws_secret_access_key=secret)
def download(self, location, local_dir='.'): '''Download content from bucket/prefix/location. Location can be a directory or a file (e.g., my_dir or my_dir/my_image.tif) If location is a directory, all files in the directory are downloaded. If it is a file, then that file is downloaded. Args: location (str): S3 location within prefix. local_dir (str): Local directory where file(s) will be stored. Default is here. ''' self.logger.debug('Getting S3 info') bucket = self.info['bucket'] prefix = self.info['prefix'] access_key = self.info['S3_access_key'] secret_key = self.info['S3_secret_key'] session_token = self.info['S3_session_token'] self.logger.debug('Connecting to S3') s3conn = botos3.connect_to_region('us-east-1', aws_access_key_id=access_key, aws_secret_access_key=secret_key, security_token=session_token) b = s3conn.get_bucket(bucket, validate=False, headers={'x-amz-security-token': session_token}) # remove head and/or trail backslash from location location = location.strip('/') whats_in_here = b.list(prefix + '/' + location) self.logger.debug('Downloading contents') for key in whats_in_here: # skip directory keys if not key.name or key.name.endswith('/'): continue # get path to each file filepath = key.name.replace(prefix + '/' + location, '', 1).lstrip('/') filename = key.name.split('/')[-1] self.logger.debug(filename) file_dir = filepath.split('/')[:-1] file_dir = '/'.join(file_dir) full_dir = os.path.join(local_dir, file_dir) # make sure directory exists if not os.path.isdir(full_dir): os.makedirs(full_dir) # download file key.get_contents_to_filename(full_dir + '/' + filename) self.logger.debug('Done!')
def _get_s3_bucket(self): ''' Create an S3 connection to the bucket ''' if self.REGION is None: conn = S3Connection(self.AWS_ACCESS_KEY, self.AWS_SECRET_KEY) else: # Bucket names with '.' need special treatment if '.' in self.BUCKET_NAME: conn = connect_to_region( self.REGION, aws_access_key_id=self.AWS_ACCESS_KEY, aws_secret_access_key=self.AWS_SECRET_KEY, calling_format=OrdinaryCallingFormat()) else: conn = connect_to_region( self.REGION, aws_access_key_id=self.AWS_ACCESS_KEY, aws_secret_access_key=self.AWS_SECRET_KEY) return conn.get_bucket(self.BUCKET_NAME)
def _create_connection(self): """ Create a new S3 Connection """ aws_access_key, aws_secret_key = self.credentials_manager.get_credentials() return s3.connect_to_region(self._get_bucket_region(), aws_access_key_id=aws_access_key.strip(), aws_secret_access_key=aws_secret_key.strip(), is_secure=self._s3_is_secure, port=self._s3_port, https_connection_factory=self._s3_https_connection_factory )
def conn(self): from boto.s3 import connect_to_region from boto.s3.connection import OrdinaryCallingFormat from baiji.config import settings if not self._connected: self._conn = connect_to_region( settings.region, aws_access_key_id=settings.key, aws_secret_access_key=settings.secret, calling_format=OrdinaryCallingFormat(), suppress_consec_slashes=False) self._connected = True return self._conn
def run(self): import boto.s3 as s3 conn = s3.connect_to_region(self.region) bucket = s3.bucket.Bucket(connection=conn, name=self.bucket) filebase = self.distribution.get_name() for filepath in os.listdir('dist'): if os.path.isdir(filepath): continue if not filepath.startswith(filebase): continue self.do_upload_file(bucket, 'dist/', filepath)
def upload_inputs(jar, raster_csv, institution_csv, bucket, prefix): logger.info("Uploading inputs to s3..") s3_conn = s3.connect_to_region('us-west-2') bucket = s3_conn.get_bucket(bucket) logger.info("Uploading " + raster_csv + " to " + prefix + '/raster_data.csv') raster_csv_key = bucket.new_key(prefix + '/raster_data.csv') raster_csv_key.set_contents_from_filename(raster_csv) logger.info("Uploading " + institution_csv + " to " + prefix + '/institution_data.csv') institution_csv_key = bucket.new_key(prefix + '/institution_data.csv') institution_csv_key.set_contents_from_filename(institution_csv) logger.info("Uploading " + jar + " to " + prefix + '/jar.jar') jar_key = bucket.new_key(prefix + '/jar.jar') jar_key.set_contents_from_filename(jar)
def create(create_tarball, push_to_s3): """Create Ibis test data""" print(str(ENV)) con = make_ibis_client() # verify some assumptions before proceeding if push_to_s3 and not create_tarball: raise IbisError( "Must specify --create-tarball if specifying --push-to-s3") if osp.exists(IBIS_TEST_DATA_LOCAL_DIR): raise IbisError( 'Local dir {0} already exists; please remove it first'.format( IBIS_TEST_DATA_LOCAL_DIR)) if not con.exists_database('tpch'): raise IbisError('`tpch` database does not exist') if not con.hdfs.exists('/test-warehouse/tpch.region_avro'): raise IbisError( 'HDFS dir /test-warehouse/tpch.region_avro does not exist') # generate tmp identifiers tmp_db_hdfs_path = pjoin(ENV.tmp_dir, guid()) tmp_db = guid() os.mkdir(IBIS_TEST_DATA_LOCAL_DIR) try: # create the tmp data locally con.create_database(tmp_db, path=tmp_db_hdfs_path) print('Created database {0} at {1}'.format(tmp_db, tmp_db_hdfs_path)) # create the local data set scrape_parquet_files(con) download_parquet_files(con, tmp_db_hdfs_path) download_avro_files(con) generate_csv_files() finally: con.drop_database(tmp_db, force=True) assert not con.hdfs.exists(TMP_DB_HDFS_PATH) if create_tarball: check_call('tar -xzf {0} {1}'.format(IBIS_TEST_DATA_TARBALL, IBIS_TEST_DATA_LOCAL_DIR), shell=True) if push_to_s3: from boto.s3 import connect_to_region s3_conn = connect_to_region('us-west-2') bucket = s3_conn.get_bucket(IBIS_TEST_DATA_S3_BUCKET) copy_tarball_to_versioned_backup(bucket) key = bucket.new_key(IBIS_TEST_DATA_TARBALL) print('Upload tarball to S3') key.set_contents_from_filename(IBIS_TEST_DATA_TARBALL, replace=False)
def upload_s3_unique(region, cfbucket, credentials, file_contents): logger.debug('Checking for file in s3') json_stack_filename = hashlib.sha1(file_contents).hexdigest() ess_three = s3.connect_to_region(region, **credentials) bucket = ess_three.get_bucket(cfbucket, validate=False) template_key = bucket.get_key(json_stack_filename) if template_key is None: logger.debug('Uploading file to s3') template_key = Key(bucket) template_key.key = json_stack_filename template_key.set_contents_from_string(file_contents) return template_key
def backup(): """Backup Postgres to S3 using pg_dump""" key_name = S3_KEY_NAME if not key_name.endswith("/") and key_name != "": key_name = "%s/" % key_name # add the file name date suffix now = datetime.now() FILENAME_SUFFIX = "_%(year)d%(month)02d%(day)02d_%(hour)02d%(minute)02d%(second)02d" % { 'year': now.year, 'month': now.month, 'day': now.day, 'hour': now.hour, 'minute': now.minute, 'second': now.second } FILENAME = ARCHIVE_NAME + FILENAME_SUFFIX + ARCHIVE_SUFFIX log.info("Preparing " + FILENAME + " from the database dump ...") # create postgres databases dump with tempfile.NamedTemporaryFile() as t1: proc1 = subprocess.Popen(POSTGRES_DUMP_PATH, shell=True, universal_newlines=True, stdout=t1) proc1.wait() t1.flush() log.info("Uploading the " + FILENAME + " file to Amazon S3 ...") # get bucket conn = connect_to_region( AWS_REGION, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY ) try: bucket = conn.get_bucket(S3_BUCKET_NAME) except S3ResponseError: sys.stderr.write("There is no bucket with the name \"" + S3_BUCKET_NAME + "\" in your Amazon S3 account\n") sys.stderr.write("Error: Please enter an appropriate bucket name and re-run the script\n") return # upload file to Amazon S3 k = Key(bucket) k.key = key_name + FILENAME k.set_contents_from_filename(t1.name) log.info("Sucessfully uploaded the archive to Amazon S3")
def get_s3_list(daterange, prefixpath): prefix = prefixpath total_size = 0 REGION = "cn-north-1" conn = s3.connect_to_region(REGION) bucket = conn.lookup('prodlog') ret = [] if bucket: for k in bucket.list(prefix=prefix): if k.size <= 0: continue logsp = k.name.split('.') a = logsp[-4:-1] a.reverse() dt = ''.join(a) if dt <= daterange: total_size += k.size ret.append('s3://prodlog/' + k.name) print('s3://prodlog/' + k.name, ''.join(a)) print('total:%d' % (total_size / 1024.0 / 1024.0 / 1024.0)) return ret
def storage_service_bucket(): """ Returns an S3 bucket for video uploads. """ conn = s3.connection.S3Connection( settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY ) # We don't need to validate our bucket, it requires a very permissive IAM permission # set since behind the scenes it fires a HEAD request that is equivalent to get_all_keys() # meaning it would need ListObjects on the whole bucket, not just the path used in each # environment (since we share a single bucket for multiple deployments in some configurations) bucket = conn.get_bucket(settings.VIDEO_UPLOAD_PIPELINE["BUCKET"], validate=False) # connect to bucket's region, and get bucket from there # https://github.com/boto/boto/issues/2207#issuecomment-60682869 bucket_location = bucket.get_location() if bucket_location: conn = s3.connect_to_region(bucket_location) if conn: bucket = conn.get_bucket(settings.VIDEO_UPLOAD_PIPELINE["BUCKET"]) return bucket
def delete(self, location): '''Delete content in bucket/prefix/location. Location can be a directory or a file (e.g., my_dir or my_dir/my_image.tif) If location is a directory, all files in the directory are deleted. If it is a file, then that file is deleted. Args: location (str): S3 location within prefix. Can be a directory or a file (e.g., my_dir or my_dir/my_image.tif). ''' bucket = self.info['bucket'] prefix = self.info['prefix'] access_key = self.info['S3_access_key'] secret_key = self.info['S3_secret_key'] session_token = self.info['S3_session_token'] self.logger.debug('Connecting to S3') s3conn = botos3.connect_to_region('us-east-1', aws_access_key_id=access_key, aws_secret_access_key=secret_key, security_token=session_token) b = s3conn.get_bucket(bucket, validate=False, headers={'x-amz-security-token': session_token}) # remove head and/or trail backslash from location if location[0] == '/': location = location[1:] if location[-1] == '/': location = location[:-2] whats_in_here = b.list(prefix + '/' + location) self.logger.debug('Deleting contents') for key in whats_in_here: b.delete_key(key) self.logger.debug('Done!')
def get_connection(self): try: import boto except ImportError: raise RuntimeError("You must have boto installed to use s3") from boto.s3.connection import OrdinaryCallingFormat from boto.s3 import connect_to_region options = self.conn_options.copy() options['is_secure'] = asbool(options['is_secure']) if options['port']: options['port'] = int(options['port']) else: del options['port'] if not options['host']: del options['host'] if asbool(options.pop('use_path_style')): options['calling_format'] = OrdinaryCallingFormat() num_retries = int(options.pop('num_retries')) timeout = float(options.pop('timeout')) region = options.pop('region') if region: del options['host'] del options['port'] conn = connect_to_region(region, **options) else: conn = boto.connect_s3(**options) conn.num_retries = num_retries conn.http_connection_kwargs['timeout'] = timeout return conn
def main(argv=None): parser = argparse.ArgumentParser( description='extent stats via Nuxeo REST API') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() file_dict = {} conn = s3.connect_to_region('us-west-2', calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket('data.nuxeo.cdlib.org.oregon') for count, key in enumerate(bucket.list()): file_dict[key.name] = key.size if count % 50000 == 0: print('{0} s3 files memorized'.format(count)) nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) documents = nx.nxql('select * from Document') row = 0 for document in documents: for blob in blob_from_doc(document): if blob: s3_size = file_dict.get(blob['digest'], None) if not s3_size: print('{0} from {1} {2} not found in S3'.format( blob['digest'], blob['path'], blob['xpath'])) if file_dict.get(blob['digest'], 0) != int(blob['length']): print( '{0} from {1} {2} s3 size {3} does not match nuxeo size {3}' .format(blob['digest'], blob['path'], blob['xpath'], s3_size, blob['length'])) if row % 25000 == 0: print('{0} nuxeo blobs checked'.format(row)) row = row + 1
def _connect_to_s3(region): global _s3_connection _s3_connection = s3.connect_to_region(region)
import logging import os from S3.Exceptions import S3ResponseError from os.path import join from boto.s3 import connect_to_region import boto.s3.connection import datetime # Mehmet ERSOY # https://github.com/MhmtErsy/AWS_S3_Connector_Python try: conn = connect_to_region( '<region>', aws_access_key_id='<access_key_id>', aws_secret_access_key='<secret_access_key>', host='<host>', #is_secure=True, # uncomment if you are not using ssl calling_format=boto.s3.connection.OrdinaryCallingFormat(), ) except Exception, e: logging.error('Failed: ' + str(e)) today = str(datetime.date.today()) today = today.replace(' ', '-') DOWNLOAD_LOCATION_PATH = 'S3_Downloads/' def DeleteDirectoryOrBucketFiles(bucket_name, directory_name=None): bucket = conn.get_bucket(bucket_name) if directory_name is not None:
def testSuccessWithHostOverrideRegion(self): connect_args = dict({'host': 's3.amazonaws.com'}) connection = connect_to_region('us-west-2', **connect_args) self.assertEquals('s3.amazonaws.com', connection.host) self.assertIsInstance(connection, S3Connection)
def create_all(app, user=None, password=None, bucket_name=None, location=None, include_hidden=False): """ Uploads of the static assets associated with a Flask application to Amazon S3. All static assets are identified on the local filesystem, including any static assets associated with *registered* blueprints. In turn, each asset is uploaded to the bucket described by `bucket_name`. If the bucket does not exist then it is created. Flask-S3 creates the same relative static asset folder structure on S3 as can be found within your Flask application. Many of the optional arguments to `create_all` can be specified instead in your application's configuration using the Flask-S3 `configuration`_ variables. :param app: a :class:`flask.Flask` application object. :param user: an AWS Access Key ID. You can find this key in the Security Credentials section of your AWS account. :type user: `basestring` or None :param password: an AWS Secret Access Key. You can find this key in the Security Credentials section of your AWS account. :type password: `basestring` or None :param bucket_name: the name of the bucket you wish to server your static assets from. **Note**: while a valid character, it is recommended that you do not include periods in bucket_name if you wish to serve over HTTPS. See Amazon's `bucket restrictions`_ for more details. :type bucket_name: `basestring` or None :param location: the AWS region to host the bucket in; an empty string indicates the default region should be used, which is the US Standard region. Possible location values include: `'DEFAULT'`, `'EU'`, `'USWest'`, `'APSoutheast'` :type location: `basestring` or None :param include_hidden: by default Flask-S3 will not upload hidden files. Set this to true to force the upload of hidden files. :type include_hidden: `bool` .. _bucket restrictions: http://docs.amazonwebservices.com/AmazonS3\ /latest/dev/BucketRestrictions.html """ user = user or app.config.get('AWS_ACCESS_KEY_ID') password = password or app.config.get('AWS_SECRET_ACCESS_KEY') bucket_name = bucket_name or app.config.get('S3_BUCKET_NAME') if not bucket_name: raise ValueError("No bucket name provided.") location = location or app.config.get('S3_REGION') logger.info(location) # build list of static files all_files = _gather_files(app, include_hidden) logger.debug("All valid files: %s" % all_files) # connect to s3 if not location: conn = S3Connection(user, password) # (default region) else: conn = connect_to_region(location, aws_access_key_id=user, aws_secret_access_key=password) # get_or_create bucket try: try: bucket = conn.get_bucket(bucket_name) except S3CreateError as e: if e.error_code == u'BucketAlreadyOwnedByYou': bucket = conn.get_bucket(bucket_name) else: raise e bucket.make_public(recursive=False) except S3CreateError as e: raise e if app.config['S3_ONLY_MODIFIED']: try: hashes = json.loads( Key(bucket=bucket, name=".file-hashes").get_contents_as_string()) except S3ResponseError as e: logger.warn("No file hashes found: %s" % e) hashes = None new_hashes = _upload_files(app, all_files, bucket, hashes=hashes) try: k = Key(bucket=bucket, name=".file-hashes") k.set_contents_from_string(json.dumps(dict(new_hashes))) except S3ResponseError as e: logger.warn("Unable to upload file hashes: %s" % e) else: _upload_files(app, all_files, bucket)
gbdx = Interface() #Extract values from customer S3 information sinfo = gbdx.s3.info region = 'us-east-1' access_key = sinfo['S3_access_key'] secret_key = sinfo['S3_secret_key'] session_token = sinfo['S3_session_token'] gbdx_bucket = sinfo['bucket'] headers = {'x-amz-security-token': session_token} gbdx_prefix = sinfo['prefix'] #Connect to s3 with credentials conn = s3.connect_to_region(region_name=region, aws_access_key_id=access_key, aws_secret_access_key=secret_key, security_token=session_token) #Get gbd-customer-data bucket bucket = conn.get_bucket(bucket_name=gbdx_bucket, headers=headers, validate=False) #This example will print the entire contents #of the customer's prefix, modify as needed. #See http://docs.aws.amazon.com/AmazonS3/latest/dev/ListingKeysHierarchy.html #get_all_keys() method could also be used similarly keys = bucket.list(prefix=gbdx_prefix) for key in keys:
def main(argv=None): parser = argparse.ArgumentParser( description='extent stats via Nuxeo REST API') parser.add_argument('path', nargs=1, help="root path") parser.add_argument( 'outdir', nargs=1, ) parser.add_argument('--no-s3-check', dest='s3_check', action='store_false') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() # look up all the files in S3, so we can double check that all # the files exist as we loop through Nuxeo file_check = None s3_bytes = s3_count = 0 if argv.s3_check: from boto import s3 from boto.s3.connection import OrdinaryCallingFormat file_check = {} conn = s3.connect_to_region('us-west-2', calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket('data.nuxeo.cdlib.org.oregon') for count, key in enumerate(bucket.list()): file_check[key.name] = key.size if count % 50000 == 0: print('{0} s3 files memorized'.format(count), file=sys.stderr) s3_bytes = s3_bytes + key.size s3_count = len(file_check) nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) campuses = nx.children(argv.path[0]) summary_workbook = xlsxwriter.Workbook( os.path.join(argv.outdir[0], '{}-summary.xlsx'.format(today))) # cell formats header_format = summary_workbook.add_format({ 'bold': True, }) number_format = summary_workbook.add_format() number_format.set_num_format('#,##0') summary_worksheet = summary_workbook.add_worksheet('summary') # headers summary_worksheet.write(0, 1, 'deduplicated files', header_format) summary_worksheet.write(0, 2, 'deduplicated bytes', header_format) summary_worksheet.write(0, 4, 'total files', header_format) summary_worksheet.write(0, 5, 'total bytes', header_format) if argv.s3_check: summary_worksheet.write(0, 7, 'files on S3', header_format) summary_worksheet.write(0, 8, 'bytes on S3', header_format) # widths summary_worksheet.set_column( 0, 1, 10, ) summary_worksheet.set_column( 2, 2, 25, ) summary_worksheet.set_column( 3, 4, 10, ) summary_worksheet.set_column( 5, 5, 25, ) summary_worksheet.set_column( 6, 7, 10, ) summary_worksheet.set_column( 8, 8, 25, ) summary_worksheet.set_column( 9, 9, 10, ) true_count = dedup_total = total_count = running_total = 0 row = 1 for campus in campuses: basename = os.path.basename(campus['path']) documents = nx.nxql( 'select * from Document where ecm:path startswith"{0}"'.format( campus['path'])) (this_count, this_total, dedup_count, dedup_bytes) = forCampus(documents, basename, file_check, argv.outdir[0]) summary_worksheet.write(row, 0, basename) summary_worksheet.write(row, 1, dedup_count, number_format) summary_worksheet.write(row, 2, dedup_bytes, number_format) summary_worksheet.write(row, 3, sizeof_fmt(dedup_bytes)) summary_worksheet.write(row, 4, this_count, number_format) summary_worksheet.write(row, 5, this_total, number_format) summary_worksheet.write(row, 6, sizeof_fmt(this_total)) total_count = total_count + this_count # number of files running_total = running_total + this_total # number of bytes true_count = true_count + dedup_count dedup_total = dedup_total + dedup_bytes # number of bytes row = row + 1 summary_worksheet.write(row, 0, '{}'.format(today)) summary_worksheet.write(row, 1, true_count, number_format) summary_worksheet.write(row, 2, dedup_total, number_format) summary_worksheet.write(row, 3, sizeof_fmt(dedup_total)) summary_worksheet.write(row, 4, total_count, number_format) summary_worksheet.write(row, 5, running_total, number_format) summary_worksheet.write(row, 6, sizeof_fmt(running_total)) if argv.s3_check: summary_worksheet.write(row, 7, s3_count, number_format) summary_worksheet.write(row, 8, s3_bytes, number_format) summary_worksheet.write(row, 9, sizeof_fmt(s3_bytes)) summary_workbook.close()
def run(self, conn, tmp, module_name, module_args, inject, complex_args=None, **kwargs): try: args = {} if complex_args: args.update(complex_args) args.update(parse_kv(module_args)) role_name = args["role_name"] account_number = args["account_number"] region = args["region"] logging_bucket = args["log_bucket"] envdict = {} if self.runner.environment: env = template.template(self.runner.basedir, self.runner.environment, inject, convert_bare=True) env = utils.safe_eval(env) bucketName = "config-bucket-%s-%s" % (account_number, region) snsName = "config-topic-%s-%s" % (account_number, region) s3_conn = s3.connect_to_region( region, aws_access_key_id=env.get("AWS_ACCESS_KEY_ID"), aws_secret_access_key=env.get("AWS_SECRET_ACCESS_KEY"), security_token=env.get("AWS_SECURITY_TOKEN")) try: bucket = s3_conn.get_bucket(bucketName) except Exception, e: if (region == "us-east-1"): bucket1 = s3_conn.create_bucket(bucketName) bucket2 = s3_conn.get_bucket(logging_bucket) response = bucket1.enable_logging(bucket2, "ConfigBucket/") else: bucket1 = s3_conn.create_bucket(bucketName, location=region) bucket2 = s3_conn.get_bucket(logging_bucket) response = bucket1.enable_logging(bucket2, "ConfigBucket/") sns_conn = sns.connect_to_region( region, aws_access_key_id=env.get("AWS_ACCESS_KEY_ID"), aws_secret_access_key=env.get("AWS_SECRET_ACCESS_KEY"), security_token=env.get("AWS_SECURITY_TOKEN")) sns_conn.create_topic(snsName) snsARN = "arn:aws:sns:%s:%s:%s" % (region, account_number, snsName) connection = configservice.connect_to_region( region, aws_access_key_id=env.get("AWS_ACCESS_KEY_ID"), aws_secret_access_key=env.get("AWS_SECRET_ACCESS_KEY"), security_token=env.get("AWS_SECURITY_TOKEN")) response = connection.describe_configuration_recorders() if len(response["ConfigurationRecorders"]) is 0: recorder_name = "config-recorder-%s" % account_number else: for item in response["ConfigurationRecorders"]: recorder_name = item["name"] response = connection.describe_delivery_channels() if len(response["DeliveryChannels"]) is 0: channel_name = "config-channel-%s" % account_number else: for item in response["DeliveryChannels"]: channel_name = item["name"] ConfigurationRecorder = { 'name': recorder_name, 'roleARN': "arn:aws:iam::%s:role/%s" % (account_number, role_name) } ConfigurationChannel = { 'name': channel_name, 's3BucketName': bucketName, 'snsTopicARN': snsARN } response = connection.put_configuration_recorder( ConfigurationRecorder) response = connection.put_delivery_channel(ConfigurationChannel) response = connection.start_configuration_recorder(recorder_name) return ReturnData(conn=conn, comm_ok=True, result=dict(failed=False, changed=False, msg="Config Service Created"))
def action(self): region = self.get_input('Region') color = self.get_input('StackColor') hosted_zone_name = self.get_input('PublicHostedZoneName') app_lb_endpoint = self.get_input('AppServerLoadBalancerEndpoint') app_lb_hosted_zone_id = self.get_input( 'AppServerLoadBalancerHostedZoneNameID') backward_compat_hosted_zone_name = self.get_input( 'BackwardCompatPublicHostedZoneName') backward_compat_app_lb_endpoint = self.get_input( 'BackwardCompatAppServerLoadBalancerEndpoint') backward_compat_app_lb_hosted_zone_id = self.get_input( 'BackwardCompatAppServerLoadBalancerHostedZoneNameID') route53_conn = r53.connect_to_region(region, profile_name=self.aws_profile) public_hosted_zone = route53_conn.get_zone(hosted_zone_name) record_sets = r53.record.ResourceRecordSets(route53_conn, public_hosted_zone.id) record_sets.add_change('UPSERT', hosted_zone_name, 'A', alias_hosted_zone_id=app_lb_hosted_zone_id, alias_dns_name=app_lb_endpoint, alias_evaluate_target_health=True, identifier='Primary', failover='PRIMARY') record_sets.commit() backward_compat_hosted_zone = route53_conn.get_zone( backward_compat_hosted_zone_name) backward_compat_record_sets = r53.record.ResourceRecordSets( route53_conn, backward_compat_hosted_zone.id) backward_compat_record_sets.add_change( 'UPSERT', backward_compat_hosted_zone_name, 'A', alias_hosted_zone_id=backward_compat_app_lb_hosted_zone_id, alias_dns_name=backward_compat_app_lb_endpoint, alias_evaluate_target_health=True, identifier='Primary', failover='PRIMARY') backward_compat_record_sets.commit() s3_conn = s3.connect_to_region(region, profile_name=self.aws_profile, calling_format=OrdinaryCallingFormat()) bucket = s3_conn.get_bucket('tile-cache.{}'.format(hosted_zone_name)) rules = s3.website.RoutingRules() rules.add_rule( s3.website.RoutingRule( s3.website.Redirect(protocol='https', http_redirect_code=302, hostname='{}-tiles.{}'.format( color.lower(), hosted_zone_name)), s3.website.Condition(http_error_code=404))) bucket.configure_website(suffix='index.html', error_key='error.html', routing_rules=rules)
def _open(self): if self._conn is None: self._conn = connect_to_region( self.region, calling_format=OrdinaryCallingFormat()) self._bucket = self._conn.get_bucket(self.bucket, validate=False)
def main(config_file, region, log_level, log_file, concurrency, selection_string, accumulation_string, reduction_string, command, s3_uri): '''Perform simple listing, collating, or deleting of many S3 objects at the same time. Examples: \b List empty objects: s3workers list --select 'size == 0' s3://mybucket/myprefix \b Report total of all non-empty objects: s3workers list --select 'size > 0' --reduce 'accumulator += size' s3://mybucket/myprefix \b Total size group by MD5: s3workers list --accumulator '{}' --reduce 'v=accumulator.get(md5,0)+size; accumulator[md5]=v' s3://mybucket/myprefix ''' # noqa: E501 config = ConfigStruct(config_file, 'options', options=DEFAULTS) opts = config.options # let command line options have temporary precedence if provided values opts.might_prefer(region=region, log_level=log_level, log_file=log_file, concurrency=concurrency) config.configure_basic_logging(__name__) logger = logging.getLogger(__name__) s3_uri = re.sub(r'^(s3:)?/+', '', s3_uri) items = s3_uri.split('/', 1) bucket_name = items[0] prefix = items[1] if len(items) > 1 else '' conn = s3.connect_to_region(opts.region) if opts.region else connect_s3() bucket = conn.get_bucket(bucket_name) progress = S3KeyProgress() reducer = None if reduction_string: reducer = Reducer(reduction_string, accumulation_string) def key_dumper(key): accumulator = reducer.reduce(key.name, key.size, key.md5, key.last_modified) progress.write('%s %10d %s %s => %s', key.last_modified, key.size, key.md5, key.name, accumulator) else: def key_dumper(key): progress.write('%s %10d %s %s', key.last_modified, key.size, key.md5, key.name) def key_deleter(key): progress.write('DELETING: %s %10d %s %s', key.last_modified, key.size, key.md5, key.name) key.delete() selector = compile(selection_string, '<select>', 'eval') if selection_string else None handler = key_deleter if command == 'delete' else key_dumper manager = Manager(opts.concurrency) manager.start_workers() logger.info('Preparing %d jobs for %d workers', len(SHARDS), manager.worker_count) # break up jobs into single char prefix jobs for shard in SHARDS: manager.add_work(S3ListJob(bucket, prefix + shard, selector, handler, progress.report)) manager.wait_for_workers() progress.finish() if reducer: click.echo('Final accumulator value: ' + str(reducer.accumulator))
def testDefaultWithInvalidHostNone(self): connect_args = dict({'host': None}) connection = connect_to_region('us-east-1', **connect_args) self.assertEquals('s3.amazonaws.com', connection.host) self.assertIsInstance(connection, S3Connection)
def testSuccessWithDefaultEUCentral1(self): connection = connect_to_region('eu-central-1') self.assertEquals('s3.eu-central-1.amazonaws.com', connection.host) self.assertIsInstance(connection, S3Connection)
def testSuccessWithDefaultUSEast1(self): connection = connect_to_region('us-east-1') self.assertEquals('s3.amazonaws.com', connection.host) self.assertIsInstance(connection, S3Connection)
def __init__(self, region, **kwargs): self._region = region self._options = kwargs self._connection = s3.connect_to_region(region)
# Logging setup log = logging.getLogger(__name__) root = logging.getLogger() root.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s: %(levelname)s %(message)s') ch.setFormatter(formatter) root.addHandler(ch) logging.getLogger('boto').setLevel(logging.INFO) # EOF logging setup. Pfew. conn = s3.connect_to_region( S3_REGION, aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'], aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'], ) i = S3ImageResizer(conn) i.fetch( 'http://s3-eu-west-1.amazonaws.com/pnt-item-pictures-incoming/item_58412fc768022a5112b0b110_000002/0.jpg' ) url = i.store(in_bucket=BUCKET_NAME, key_name='raw.jpg') log.info("Got url %s" % url) # Should be 'https://pnt-tests.s3-eu-west-1.amazonaws.com/raw.jpg' want = 'https://%s.s3-%s.amazonaws.com:443/%s' % (BUCKET_NAME, S3_REGION, 'raw.jpg')
def _synch(settings): cache = File(settings.local_cache) if not cache.exists: cache.create() settings.destination.directory = settings.destination.directory.trim("/") for repo in listwrap(coalesce(settings.repo, settings.repos)): Log.alert("Synch {{repo}}", repo=repo.description) if not strings.between(repo.source.url, "/", ".git"): Log.error("This is not a git reference: {{git_url}}", git_url=repo.source.url) name = coalesce(repo.source.name, strings.between(repo.source.url, "/", ".git")) if not repo.source.branch: Log.note("{{name}} has not branch property", name=name) # DO WE HAVE A LOCAL COPY? local_repo = File.new_instance(cache, name) local_dir = File.new_instance(local_repo, repo.source.directory) if not local_repo.exists: Process("clone repo", ["git", "clone", repo.source.url, name], cwd=cache, shell=True, debug=DEBUG).join(raise_on_error=True) # SWITCH TO BRANCH Process("checkout", ["git", "checkout", repo.source.branch], cwd=local_repo, shell=True, debug=DEBUG).join(raise_on_error=True) # UPDATE THE LOCAL COPY Process("update", ["git", "pull", "origin", repo.source.branch], cwd=local_repo, shell=True, debug=DEBUG).join(raise_on_error=True) # GET CURRENT LISTING OUT OF S3 try: connection = connect_to_region( region_name=repo.destination.region, calling_format="boto.s3.connection.OrdinaryCallingFormat", aws_access_key_id=unwrap(repo.destination.aws_access_key_id), aws_secret_access_key=unwrap( repo.destination.aws_secret_access_key)) bucket = connection.get_bucket(repo.destination.bucket) except Exception as e: Log.error("Problem connecting to {{bucket}}", bucket=repo.destination.bucket, cause=e) remote_prefix = repo.destination.directory.strip('/') + "/" listing = bucket.list(prefix=remote_prefix) metas = { m.key[len(remote_prefix):]: Data(key=m.key, etag=m.etag) for m in listing } net_new = [] Log.note("Look for differences") for local_file in local_dir.leaves: local_rel_file = local_file.abspath[len(local_dir.abspath ):].lstrip(b'/') if "/." in local_rel_file or local_rel_file.startswith("."): continue local_rel_file = local_rel_file.replace("qb/Qb", "qb/qb") remote_file = metas.get(local_rel_file) if not repo.force and remote_file: if remote_file.etag != md5(local_file): net_new.append(local_file) else: net_new.append(local_file) # SEND DIFFERENCES for n in net_new: remote_file = join_path(repo.destination.directory, n.abspath[len(local_dir.abspath):]) remote_file = remote_file.replace("qb/Qb", "qb/qb") try: Log.note("upload {{file}} ({{type}})", file=remote_file, type=n.mime_type) storage = bucket.new_key(remote_file) storage.content_type = n.mime_type storage.set_contents_from_string(n.read_bytes()) storage.set_acl('public-read') except Exception as e: Log.warning("can not upload {{file}} ({{type}})", file=remote_file, type=n.mime_type, cause=e)
def download_auth(bucket_name, key_name, region): conn = connect_to_region( region, calling_format='boto.s3.connection.OrdinaryCallingFormat') bucket = conn.get_bucket(bucket_name, validate=False) key = Key(bucket=bucket, name=key_name) print key.get_contents_as_string()
def publish_config_command(args): master_config = vars(args)["master-config"] master_config_text = fetch(master_config) tiers_config = json.loads(master_config_text) conn = connect_to_region(tiers_config["region"], calling_format=OrdinaryCallingFormat()) bucket_name = "{}.{}".format(tiers_config["bucket"], tiers_config["domain"]) bucket = conn.lookup(bucket_name) if not bucket: print "In region {}, creating S3 bucket {}".format( tiers_config["region"], bucket_name) bucket = conn.create_bucket(bucket_name, location=tiers_config["region"]) args.dirty = False def upload_if_changed(source, key_name, topic): global dirty if key_name.lower().endswith(".json"): try: json.loads(source) # Verify json syntax except ValueError as e: print "Json file is not json enough: ", key_name, e sys.exit(1) print " {}".format(key_name), key = bucket.get_key(key_name) if key: dest = key.get_contents_as_string() if source == dest: print " - No changes detected in {}.".format(topic) key = None else: key = Key(bucket) mimetype, encoding = mimetypes.guess_type(key_name) if mimetype: key.set_metadata('Content-Type', mimetype) key.key = key_name if key: key.set_contents_from_string(source) print " uploaded to s3://{}/{}".format(bucket_name, key_name) args.dirty = True # Upload master config upload_if_changed(master_config_text, TIERS_CONFIG_FILENAME, "master config") # Upload all tiers LEGACY STUFF FOR API ROUTER for filename in glob("tiers/*.json"): with open(filename, "r") as f: upload_if_changed(f.read(), filename, "tier config") # Upload all tier for filename in glob("tiers/*/*.json"): with open(filename, "r") as f: upload_if_changed(f.read(), filename, "tier config") # Upload all SSH keys for filename in glob("ssh-keys/*.pem"): with open(filename, "r") as f: upload_if_changed(f.read(), filename, "ssh key") if args.dirty: print "Changes detected! Please update your local environment " \ "with 'tier use [tier-name]' command." print "Publish command ran successfully."
#!/usr/bin/env python import os, sys from boto import s3 as s3 conn = s3.connect_to_region("us-east-1") def list_buckets(): buckets = conn.get_all_buckets() print("List of buckets:") for b in buckets: try: name = b.name except AttributeError: name = "NoName" print("\t" + name) def list_files(): bucketname = raw_input("Which bucket's files you want? ") count = 0 if (conn.lookup(bucketname)): files = [k for k in conn.get_bucket(bucketname)] print "Files: " for k in files: count += 1 print k.name.encode('utf-8') print("\nTotal number of files: " + str(count)) choice = raw_input("Would you like to download these files? ") if (choice == "y"): choice = raw_input("And delete the files after downloading them? ")
def connect_s3(): # we'll need to deal with this assumption return s3.connect_to_region('us-east-1') # Location.USWest2)
def deploy_tarball_to_s3(tarball_obj, bucket_name, prefix='', region='us-west-2', concurrency=50, no_compress=False, strip_components=0): """ Upload the contents of `tarball_obj`, a File-like object representing a valid .tar.gz file, to the S3 bucket `bucket_name` """ # Connect to S3 and get a reference to the bucket name we will push files to conn = connect_to_region(region) if conn is None: logging.error("Invalid AWS region %s" % region) return try: bucket = conn.get_bucket(bucket_name, validate=True) except boto.exception.S3ResponseError: logging.error("S3 bucket %s does not exist in region %s" % (bucket_name, region)) return # Open the tarball try: with tarfile.open(name=None, mode="r:*", fileobj=tarball_obj) as tarball: files_uploaded = 0 # Parallelize the uploads so they don't take ages pool = Pool(concurrency) # Iterate over the tarball's contents. try: for member in tarball: # Ignore directories, links, devices, fifos, etc. if not member.isfile(): continue # Mimic the behaviour of tar -x --strip-components= stripped_name = member.name.split('/')[strip_components:] if not bool(stripped_name): continue path = os.path.join(prefix, '/'.join(stripped_name)) # Read file data from the tarball fd = tarball.extractfile(member) # Send a job to the pool. pool.wait_available() pool.apply_async(__deploy_asset_to_s3, (fd.read(), path, member.size, bucket, not no_compress)) files_uploaded += 1 # Wait for all transfers to finish pool.join() except KeyboardInterrupt: # Ctrl-C pressed print("Cancelling upload...") pool.join() finally: print("Uploaded %i files" % (files_uploaded)) except tarfile.ReadError: print("Unable to read asset tarfile", file=sys.stderr) return
def __init__(self, settings): self.conn = s3.connect_to_region( 'us-east-1', aws_access_key_id=settings['key'], aws_secret_access_key=settings['secret']) self.bucket = self.conn.get_bucket(settings['bucket'])
def main(argv=None): parser = argparse.ArgumentParser( description="extent stats via Nuxeo REST API") parser.add_argument( "outdir", nargs=1, ) parser.add_argument("--no-s3-check", dest="s3_check", action="store_false") utils.get_common_options(parser) if argv is None: argv = parser.parse_args() os.makedirs(argv.outdir[0], exist_ok=True) # look up all the files in S3, so we can double check that all # the files exist as we loop through Nuxeo file_check = None s3_bytes = s3_count = 0 if argv.s3_check: from boto import s3 from boto.s3.connection import OrdinaryCallingFormat file_check = {} conn = s3.connect_to_region("us-west-2", calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket("data.nuxeo.cdlib.org.oregon") for count, key in enumerate(bucket.list()): file_check[key.name] = key.size if count % 50000 == 0: print("{0} s3 files memorized".format(count), file=sys.stderr) s3_bytes = s3_bytes + key.size s3_count = len(file_check) nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) campuses = [ "UCB", "UCD", "UCI", "UCLA", "UCM", "UCOP", "UCR", "UCSB", "UCSC", "UCSD", "UCSF", ] summary_workbook = xlsxwriter.Workbook( os.path.join(argv.outdir[0], "{}-summary.xlsx".format(today))) # cell formats header_format = summary_workbook.add_format({ "bold": True, }) number_format = summary_workbook.add_format() number_format.set_num_format("#,##0") summary_worksheet = summary_workbook.add_worksheet("summary") # headers summary_worksheet.write(0, 1, "deduplicated files", header_format) summary_worksheet.write(0, 2, "deduplicated bytes", header_format) summary_worksheet.write(0, 4, "total files", header_format) summary_worksheet.write(0, 5, "total bytes", header_format) if argv.s3_check: summary_worksheet.write(0, 7, "files on S3", header_format) summary_worksheet.write(0, 8, "bytes on S3", header_format) # widths summary_worksheet.set_column( 0, 1, 10, ) summary_worksheet.set_column( 2, 2, 25, ) summary_worksheet.set_column( 3, 4, 10, ) summary_worksheet.set_column( 5, 5, 25, ) summary_worksheet.set_column( 6, 7, 10, ) summary_worksheet.set_column( 8, 8, 25, ) summary_worksheet.set_column( 9, 9, 10, ) true_count = dedup_total = total_count = running_total = 0 row = 1 for campus in campuses: (this_count, this_total, dedup_count, dedup_bytes) = forCampus(campus, file_check, argv.outdir[0], nx) # write out this row in the sheet summary_worksheet.write(row, 0, campus) summary_worksheet.write(row, 1, dedup_count, number_format) summary_worksheet.write(row, 2, dedup_bytes, number_format) summary_worksheet.write(row, 3, sizeof_fmt(dedup_bytes)) summary_worksheet.write(row, 4, this_count, number_format) summary_worksheet.write(row, 5, this_total, number_format) summary_worksheet.write(row, 6, sizeof_fmt(this_total)) # keep track of running totals total_count = total_count + this_count # number of files running_total = running_total + this_total # number of bytes true_count = true_count + dedup_count dedup_total = dedup_total + dedup_bytes # number of bytes row = row + 1 # write totals in the summary worksheet summary_worksheet.write(row, 0, "{}".format(today)) summary_worksheet.write(row, 1, true_count, number_format) summary_worksheet.write(row, 2, dedup_total, number_format) summary_worksheet.write(row, 3, sizeof_fmt(dedup_total)) summary_worksheet.write(row, 4, total_count, number_format) summary_worksheet.write(row, 5, running_total, number_format) summary_worksheet.write(row, 6, sizeof_fmt(running_total)) if argv.s3_check: summary_worksheet.write(row, 7, s3_count, number_format) summary_worksheet.write(row, 8, s3_bytes, number_format) summary_worksheet.write(row, 9, sizeof_fmt(s3_bytes)) summary_workbook.close()
#usage: remove_public.py bucketName folderName import sys import boto from boto import connect_s3 from boto.s3 import connect_to_region from boto.s3.connection import S3Connection, OrdinaryCallingFormat bucketname = sys.argv[1] dirname = sys.argv[2] s3 = connect_to_region('ap-southeast-2', aws_access_key_id='your_key_here', aws_secret_access_key='your_secret_here', is_secure=True, calling_format=OrdinaryCallingFormat()) bucket = s3.get_bucket(bucketname) keys = bucket.list() for k in keys: new_grants = [] acl = k.get_acl() for g in acl.acl.grants: if g.uri != "http://acs.amazonaws.com/groups/global/AllUsers": new_grants.append(g) acl.acl.grants = new_grants k.set_acl(acl)
def testWithNonAWSHost(self): connect_args = dict({'host': 'www.not-a-website.com'}) connection = connect_to_region('us-east-1', **connect_args) self.assertEquals('www.not-a-website.com', connection.host) self.assertIsInstance(connection, S3Connection)