def fetch_s3_object(self, bucket, key): if not self.no_cache: m = hashlib.md5() m.update(bucket+key) cache_key = m.hexdigest() if cache_key in self.cache: self.logger.debug('cache hit for %s' % cache_key) obj = self.cache[cache_key] print obj return obj else: self.logger.debug('cache miss for %s' % cache_key) conn = boto.connect_s3() b = conn.get_bucket(bucket) k = b.get_key(key) if k: obj = (k.get_contents_as_string(), self.build_s3_meta(k)) self.cache[cache_key] = obj return obj else: return None, None else: conn = boto.connect_s3() k = conn.get_bucket(bucket).get_key(key) if k: meta = self.build_s3_meta(k) return k.get_contents_as_string(), meta else: return None, None
def uploadThumbs(src, bucket_name, aws_access_id, aws_access_secret): updated_keys = 0 # connect to the bucket conn = boto.connect_s3(aws_access_id, aws_access_secret) bucket = conn.get_bucket(bucket_name) for dirname, dirnames, filenames in os.walk(src): for filename in filenames: name, ext = filename.split('.') if ext == 'jpg': if updated_keys >= 100: # Close and Reopen connection conn.close() conn = boto.connect_s3(aws_access_id, aws_access_secret) key_dir = dirname.replace(os.path.abspath(src), '') sys.stdout.write("saving: " + key_dir + "/" + filename) k = Key(bucket) k.key = key_dir + "/" + filename k.set_contents_from_filename(os.path.join(dirname, filename),cb=done_cb) k.set_acl('public-read') sys.stdout.write("\n") sys.stdout.flush() updated_keys = updated_keys + 1
def main(): parser = argparse.ArgumentParser('Check the multipart upload status') parser.add_argument('-c', '--cancel', action="store_true", help='cancel all the outstanding ') parser.add_argument('-f', '--force', action="store_true", help='force all the outstanding ') args = vars(parser.parse_args()) if exists(join(expanduser('~'), '.aws/credentials')): # This relies on a ~/.aws/credentials file holding the '<aws access key>', '<aws secret key>' LOG.info("Using ~/.aws/credentials") s3_connection = boto.connect_s3(profile_name='chiles') else: # This relies on a ~/.boto or /etc/boto.cfg file holding the '<aws access key>', '<aws secret key>' LOG.info("Using ~/.boto or /etc/boto.cfg") s3_connection = boto.connect_s3() bucket = s3_connection.get_bucket(CHILES_BUCKET_NAME) one_day_ago = datetime.datetime.now() - datetime.timedelta(hours=24) for item in bucket.list_multipart_uploads(): LOG.info('key_name: {0}, initiated: {1}'.format(item.key_name, item.initiated)) date_initiated = datetime.datetime.strptime(item.initiated, '%Y-%m-%dT%H:%M:%S.%fZ') if (date_initiated < one_day_ago and args['cancel']) or args['force']: LOG.info('Cancelling {0}'.format(item.key_name)) bucket.cancel_multipart_upload(item.key_name, item.id)
def main(): logging.basicConfig(level=logging.INFO) args = parser.parse_args() log.debug("Got args: %s" % args) # Check that src is a valid S3 url split_rs = urlparse.urlsplit(args.src) if split_rs.scheme != "s3": raise ValueError("'%s' is not an S3 url" % args.src) # Check that dest does not exist if os.path.exists(args.dest): if args.force: os.remove(args.dest) else: raise ValueError("Destination file '%s' exists, specify -f to" " overwrite" % args.dest) # Split out the bucket and the key s3 = boto.connect_s3() bucket = s3.lookup(split_rs.netloc) key = bucket.get_key(split_rs.path) # Determine the total size and calculate byte ranges conn = boto.connect_s3() resp = conn.make_request("HEAD", bucket=bucket, key=key) size = int(resp.getheader("content-length")) logging.info("Got headers: %s" % resp.getheaders()) # Skipping multipart if file is less than 1mb if size < 1024 * 1024: t1 = time.time() key.get_contents_to_filename(args.dest) t2 = time.time() - t1 log.info("Finished single-part download of %0.2fM in %0.2fs (%0.2fMbps)" % (size, t2, size/t2)) else: # Touch the file fd = os.open(args.dest, os.O_CREAT) os.close(fd) num_parts = args.num_processes def arg_iterator(num_parts): for min_byte, max_byte in gen_byte_ranges(size, num_parts): yield (bucket.name, key.name, args.dest, min_byte, max_byte) s = size / 1024 / 1024. try: t1 = time.time() pool = Pool(processes=args.num_processes) pool.map_async(do_part_download, arg_iterator(num_parts)).get(9999999) t2 = time.time() - t1 log.info("Finished downloading %0.2fM in %0.2fs (%0.2fMbps)" % (s, t2, s/t2)) except KeyboardInterrupt: log.info("User terminated") except Exception, err: log.error(err)
def create_connection(settings): """ Creates an S3 connection using credentials is skipauth is false """ if settings.get('skipauth'): conn = boto.connect_s3() else: conn = boto.connect_s3(settings['key'], settings['secret']) return conn
def download_s3_bucket(bucket_name, target, credentials=None): if credentials: conn = boto.connect_s3(*credentials) else: conn = boto.connect_s3() b = conn.get_bucket(bucket_name) L = list(b.list()) L.sort(lambda x, y: x.name > y.name) L = L[::-1] f_suffix = '_$folder$' for l in L: n = l.name if n.endswith(f_suffix): dirname = n[:-len(f_suffix)] pathname = os.path.join(target, dirname) if not os.path.exists(pathname): print(n) os.mkdir(pathname) else: pathname = os.path.join(target, n) dirn = os.path.split(pathname)[0] if dirn and not os.path.isdir(dirn): print('making directory %s' % dirn) os.makedirs(dirn) if not os.path.exists(pathname): print(n) l.get_contents_to_filename(pathname)
def get_s3_connection(aws_connect_kwargs, location, rgw, s3_url): if s3_url and rgw: rgw = urlparse(s3_url) s3 = boto.connect_s3( is_secure=rgw.scheme == 'https', host=rgw.hostname, port=rgw.port, calling_format=OrdinaryCallingFormat(), **aws_connect_kwargs ) elif is_fakes3(s3_url): fakes3 = urlparse(s3_url) s3 = S3Connection( is_secure=fakes3.scheme == 'fakes3s', host=fakes3.hostname, port=fakes3.port, calling_format=OrdinaryCallingFormat(), **aws_connect_kwargs ) elif is_walrus(s3_url): walrus = urlparse(s3_url).hostname s3 = boto.connect_walrus(walrus, **aws_connect_kwargs) else: aws_connect_kwargs['is_secure'] = True try: s3 = connect_to_aws(boto.s3, location, **aws_connect_kwargs) except AnsibleAWSError: # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases s3 = boto.connect_s3(**aws_connect_kwargs) return s3
def download_s3(self,fromPath,toPath): """download from S3 to local folder Args: fromPath (str): S3 URL toPath (str): local folder """ if fromPath.startswith("s3n://"): noSchemePath = fromPath[6:] elif fromPath.startswith("s3://"): noSchemePath = fromPath[5:] parts = noSchemePath.split('/') bucket = parts[0] s3path = noSchemePath[len(bucket)+1:] if self.key: self.conn = boto.connect_s3(self.key,self.secret) else: self.conn = boto.connect_s3() print bucket, s3path, toPath b = self.conn.get_bucket(bucket) for k in b.list(prefix=s3path): basename = os.path.basename(k.name) fnew = toPath+"/"+basename print "copying ",k.name,"to",fnew k.get_contents_to_filename(fnew)
def _getDataFiles(self,file_master=0): """ Retrieves metadata and parsed dataframe files (generated by utilities/hddata_process.py) from S3 """ comm = self._comm working_dir = self._working_dir data_source_bucket = self._datasource_bucket if comm.rank == file_master: if not op.exists(op.join( working_dir,'metadata.txt')): conn = boto.connect_s3() b = conn.get_bucket(data_source_bucket) k = Key(b) k.key = 'metadata.txt' k.get_contents_to_filename(op.join( working_dir,'metadata.txt')) if comm.rank == file_master: if not op.exists(op.join( working_dir, 'trimmed_dataframe.pandas')): conn = boto.connect_s3() b = conn.get_bucket(self._working_bucket) k = Key(b) k.key ='trimmed_dataframe.pandas' k.get_contents_to_filename(op.join( working_dir,'trimmed_dataframe.pandas')) comm.barrier()
def fetch(self, files, force=False, check=False, verbose=1): assert (self.profile_name or (self.access_key and self.secret_access_key)) files = Fetcher.reformat_files(files) # allows flexibility import boto if self.profile_name is not None: s3 = boto.connect_s3(profile_name=self.profile_name) elif (self.access_key is not None and self.secret_access_key is not None): s3 = boto.connect_s3(self.access_key, self.secret_access_key) bucket_names = np.unique([opts.get('bucket') for f, rk, opts in files]) files_ = [] for bucket_name in bucket_names: # loop over bucket names: efficient if bucket_name: # bucket requested buck = s3.get_bucket(bucket_name) else: # default to first bucket buck = s3.get_all_buckets()[0] for file_, remote_key, opts in files: if opts.get('bucket') != bucket_name: continue # get all files from the current bucket only. target_file = op.join(self.data_dir, file_) key = buck.get_key(remote_key) if not key: warnings.warn('Failed to find key: %s' % remote_key) files_.append(None) else: do_download = force or not op.exists(target_file) try: do_download = (do_download or (check and nib.load( target_file).get_data() is None)) except IOError as ioe: if verbose > 0: print("Warning: %s corrupted, re-downloading " "(Error=%s)" % (target_file, ioe)) do_download = True if do_download: # Ensure destination directory exists destination_dir = op.dirname(target_file) if not op.isdir(destination_dir): if verbose > 0: print("Creating base directory %s" % ( destination_dir)) os.makedirs(destination_dir) if verbose > 0: print("Downloading [%s]/%s to %s." % ( bucket_name or 'default bucket', remote_key, target_file)) with open(target_file, 'wb') as fp: cb = partial(test_cb, t0=time.time()) key.get_contents_to_file(fp, cb=cb, num_cb=None) files_.append(target_file) return files_
def do_connect(self): if self.conf.path_style_request: calling_format=boto.s3.connection.OrdinaryCallingFormat() else: calling_format=boto.s3.connection.SubdomainCallingFormat() if self.conf.host is None: # If version 4 signature is used, boto requires 'host' parameter # Also there is a bug in AWS Frankfurt that causes boto doesn't work. # The current work around is to give specific service address, like # s3.eu-central-1.amazonaws.com instead of s3.amazonaws.com. if self.conf.use_v4_sig: self.conn = boto.connect_s3(self.conf.key_id, self.conf.key, host='s3.%s.amazonaws.com' % self.conf.aws_region, is_secure=self.conf.use_https, calling_format=calling_format) else: self.conn = boto.connect_s3(self.conf.key_id, self.conf.key, is_secure=self.conf.use_https, calling_format=calling_format) else: self.conn = boto.connect_s3(self.conf.key_id, self.conf.key, host='%s' % self.conf.host, port=self.conf.port, is_secure=self.conf.use_https, calling_format=calling_format) self.bucket = self.conn.get_bucket(self.conf.bucket_name)
def _get_ref_from_galaxy_loc(name, genome_build, loc_file, galaxy_dt, need_remap, galaxy_config, data): """Retrieve reference genome file from Galaxy *.loc file. Reads from tool_data_table_conf.xml information for the index if it exists, otherwise uses heuristics to find line based on most common setups. """ refs = [ref for dbkey, ref in _galaxy_loc_iter(loc_file, galaxy_dt, need_remap) if dbkey == genome_build] remap_fn = alignment.TOOLS[name].remap_index_fn need_remap = remap_fn is not None if len(refs) == 0: # if we have an S3 connection, try to download try: import boto boto.connect_s3() except: raise ValueError("Could not find reference genome file %s %s" % (genome_build, name)) logger.info("Downloading %s %s from AWS" % (genome_build, name)) cur_ref = _download_prepped_genome(genome_build, data, name, need_remap) # allow multiple references in a file and use the most recently added else: cur_ref = refs[-1] if need_remap: assert remap_fn is not None, "%s requires remapping function from base location file" % name cur_ref = os.path.normpath(utils.add_full_path(cur_ref, galaxy_config["tool_data_path"])) cur_ref = remap_fn(os.path.abspath(cur_ref)) return cur_ref
def copy_s3_file(self,fromPath,bucket,path): """copy from local file to S3 Args: fromPath (str): local file bucket (str): S3 bucket path (str): S3 prefix to add to files """ if self.aws_key: self.conn = boto.connect_s3(self.aws_key,self.aws_secret) else: self.conn = boto.connect_s3() b = self.conn.get_bucket(bucket) source_size = os.stat(fromPath).st_size # Create a multipart upload request uploadPath = path logger.info("uploading to bucket %s path %s",bucket,uploadPath) mp = b.initiate_multipart_upload(uploadPath) chunk_size = 10485760 chunk_count = int(math.ceil(source_size / float(chunk_size))) for i in range(chunk_count): offset = chunk_size * i bytes = min(chunk_size, source_size - offset) with FileChunkIO(fromPath, 'r', offset=offset,bytes=bytes) as fp: logger.info("uploading to s3 chunk %d/%d",(i+1),chunk_count) mp.upload_part_from_file(fp, part_num=i + 1) # Finish the upload logger.info("completing transfer to s3") mp.complete_upload()
def download_s3(self,fromPath,toPath): """download from S3 to local folder Args: fromPath (str): S3 URL toPath (str): local folder """ if fromPath.startswith("s3n://"): noSchemePath = fromPath[6:] elif fromPath.startswith("s3://"): noSchemePath = fromPath[5:] parts = noSchemePath.split('/') bucket = parts[0] s3path = noSchemePath[len(bucket)+1:] if self.aws_key: self.conn = boto.connect_s3(self.aws_key,self.aws_secret) else: self.conn = boto.connect_s3() b = self.conn.get_bucket(bucket) for k in b.list(prefix=s3path): if not k.name.endswith("/"): basename = os.path.basename(k.name) fnew = toPath+"/"+basename logger.info("copying %s to %s",k.name,fnew) k.get_contents_to_filename(fnew)
def rubberjack(ctx, application, organisation, region, sigv4_host, bucket): """ Main entry point into the rubberjack CLI. """ ctx.obj = {} ctx.obj['application'] = application ctx.obj['organisation'] = organisation ctx.obj['region'] = region_from_name(region) ctx.obj['application_name'] = application_name = "{organisation}-{application}".format(organisation=organisation, application=application) ctx.obj['dev_environment_name'] = "{application_name}-dev".format(application_name=application_name) ctx.obj['live_environment_name'] = "{application_name}-live".format(application_name=application_name) if bucket is None: bucket = "{organisation}-rubberjack-ebdeploy".format(organisation=organisation) ctx.obj['bucket'] = bucket # boto doesn't use a default of None, it uses NoHostProvided, and I struggled to pass that myself if sigv4_host: s3 = boto.connect_s3(host=sigv4_host) else: s3 = boto.connect_s3() ctx.obj['s3'] = s3 ctx.obj['beanstalk'] = boto.beanstalk.layer1.Layer1(region=ctx.obj['region'])
def record_stack(self, stack, destination, credentials): """ S3 implementation. Uploads stack definition to configured S3 bucket. :param stack: stack definition :type stack: str. :param destination: destination to copy stack to :type destination: str. :param credentials: credentials for copy command :type credentials: dict. :returns: boolean :raises: :class:`pmcf.exceptions.AuditException` """ LOG.info('recording stack definition to s3://%s/%s', credentials['audit_output'], destination) try: s3_conn = None if credentials.get('use_iam_profile'): s3_conn = boto.connect_s3() else: s3_conn = boto.connect_s3( aws_access_key_id=credentials['access'], aws_secret_access_key=credentials['secret'] ) bucket = s3_conn.get_bucket(credentials['audit_output']) k = boto.s3.key.Key(bucket) k.key = destination k.set_contents_from_string(stack) except (boto.exception.S3ResponseError, boto.exception.BotoServerError), exc: raise AuditException(exc)
def upload_s3(): """ Upload jar file to s3 """ source_path = JARFILE source_size = os.stat(source_path).st_size # create bucket import boto conn = boto.connect_s3() bucket = conn.create_bucket(S3_BUCKET) # upload c = boto.connect_s3() b = c.get_bucket(S3_BUCKET) # Create a multipart upload request mp = b.initiate_multipart_upload(os.path.basename(source_path)) # Use a chunk size of 5 MiB chunk_size = 5242880 chunk_count = int(math.ceil(source_size / float(chunk_size))) # Send the file parts, using FileChunkIO to create a file-like object # that points to a certain byte range within the original file. We # set bytes to never exceed the original file size. for i in range(chunk_count): offset = chunk_size * i bytes = min(chunk_size, source_size - offset) with FileChunkIO(source_path, 'r', offset=offset, bytes=bytes) as fp: mp.upload_part_from_file(fp, part_num=i + 1) # Finish the upload mp.complete_upload() print("Jar uploaded to S3 bucket " + S3_BUCKET)
def __init__(self, username, access_key, secret_key, rate_limit=None, host=None): self.username = username self.access_key = access_key self.secret_key = secret_key self.rate_limit = rate_limit self.rate_limiter = RateLimiter(self.rate_limit) self.callbacks = CallbackAggregator() self.multipart_status_callbacks = CallbackAggregator() self.host = host self.logger = logging.getLogger(__name__) if self.rate_limit: self.callbacks.add_callback(self.rate_limiter) if self.host =='s3.amazonaws.com': self.connection = boto.connect_s3(self.access_key, self.secret_key) else: self.connection = boto.connect_s3(aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, is_secure=False, host=self.host, port=8773, calling_format=boto_s3_connection_class.OrdinaryCallingFormat(), path="/services/Walrus") if not self.connection: raise S3AuthError("check access_key and secret_key") self.bucket = self.connection.lookup(username) if not self.bucket: raise S3AuthError("check access_key and secret_key")
def Storage(accessKey,storageKey): # Storge bucket storageConn = get_storage_connection(accessKey,storageKey) storageTable = PrettyTable (["Storage Buckets", "Total Size"]) storageTable.align["Storage Buckets"] = "l" storageTable.padding_width = 1 boto.connect_s3(calling_format=OrdinaryCallingFormat()) reservationStorage = storageConn.get_all_buckets() totalSize = 0 for bucket in reservationStorage: s3Bucket = bucket.name if (bucket.name != 'esbundles-c14334.Ubuntu-1204-PE-300-agent'): for key in bucket.list(): size = key.size totalSize = totalSize + size storageTable.add_row([s3Bucket,""]) #print 'TotalSize: ' + str(totalSize/1073741824) + ' GB' totalSize = totalSize/1073741824 storageTable.add_row(["---------------------------------------------------", "------------"]) storageTable.add_row(["Total Size in GB", totalSize]) print storageTable file2write.writelines('\n') file2write.writelines(str(storageTable))
def _get_data_files(self): """ Retrieves metadata and parsed dataframe files (generated by utilities/hddata_process.py) from S3 """ s2f = self._s3_to_fname while not op.exists(op.join( self.working_dir, s2f(self.meta_file))): try: conn = boto.connect_s3() b = conn.get_bucket(self.ds_bucket) k = Key(b) k.key = self.meta_file k.get_contents_to_filename( op.join( self.working_dir, s2f(self.meta_file))) except: time.sleep(random.random()) while not op.exists(op.join( self.working_dir, s2f(self.data_file))): conn = boto.connect_s3() try: b = conn.get_bucket(self.ds_bucket) k = Key(b) k.key = self.data_file k.get_contents_to_filename( op.join( self.working_dir, s2f(self.data_file)) ) except S3ResponseError: self.logger.exception( 'Master has not generated files' ) raise except OSError: time.sleep(random.random())
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None): # Assuming AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_S3_HOST # are environment variables parsed_url = parse_url(filepath_or_buffer) s3_host = os.environ.get('AWS_S3_HOST', 's3.amazonaws.com') try: conn = boto.connect_s3(host=s3_host) except boto.exception.NoAuthHandlerFound: conn = boto.connect_s3(host=s3_host, anon=True) b = conn.get_bucket(parsed_url.netloc, validate=False) if compat.PY2 and (compression == 'gzip' or (compression == 'infer' and filepath_or_buffer.endswith(".gz"))): k = boto.s3.key.Key(b, parsed_url.path) filepath_or_buffer = BytesIO(k.get_contents_as_string( encoding=encoding)) else: k = BotoFileLikeReader(b, parsed_url.path, encoding=encoding) k.open('r') # Expose read errors immediately filepath_or_buffer = k return filepath_or_buffer, None, compression
def get_s3_connection(aws_access_key_id=None, aws_secret_access_key=None, anon=False, profile_name=None, **kwargs): import boto if profile_name: return boto.connect_s3(profile_name=profile_name) cfg = boto.Config() if aws_access_key_id is None: aws_access_key_id = cfg.get('Credentials', 'aws_access_key_id') if aws_access_key_id is None: aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID') if aws_secret_access_key is None: aws_secret_access_key = cfg.get('Credentials', 'aws_secret_access_key') if aws_secret_access_key is None: aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY') # anon is False but we didn't provide any credentials so try anonymously anon = (not anon and aws_access_key_id is None and aws_secret_access_key is None) return boto.connect_s3(aws_access_key_id, aws_secret_access_key, anon=anon)
def load(self, url, offset, length): if not s3_avail: #pragma: no cover raise IOError('To load from s3 paths, ' + 'you must install boto: pip install boto') aws_access_key_id = self.aws_access_key_id aws_secret_access_key = self.aws_secret_access_key parts = urlsplit(url) if parts.username and parts.password: aws_access_key_id = unquote_plus(parts.username) aws_secret_access_key = unquote_plus(parts.password) bucket_name = parts.netloc.split('@', 1)[-1] else: bucket_name = parts.netloc if not self.s3conn: try: self.s3conn = connect_s3(aws_access_key_id, aws_secret_access_key) except Exception: #pragma: no cover self.s3conn = connect_s3(anon=True) bucket = self.s3conn.get_bucket(bucket_name) key = bucket.get_key(parts.path) if offset == 0 and length == -1: headers = {} else: headers = {'Range': BlockLoader._make_range_header(offset, length)} # Read range key.open_read(headers=headers) return key
def configure(self): if settings.AWS_ENABLED: if settings.AWS_S3_FAKE_S3 is None: # The host must be manually specified in Python 2.7.9+ due to # https://github.com/boto/boto/issues/2836 this bug in boto with .s in # bucket names. host = settings.AWS_S3_HOST if settings.AWS_S3_HOST else NoHostProvided self.s3 = boto.connect_s3( settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY, host=host, calling_format=OrdinaryCallingFormat() ) else: host, port = (settings.AWS_S3_FAKE_S3.split(':', 2) + [80])[:2] port = int(port) self.s3 = boto.connect_s3("key_id", "secret_key", is_secure=False, port=port, host=host, calling_format=OrdinaryCallingFormat()) _ensure_bucket_exists(self.s3, settings.AWS_S3_SOURCE_BUCKET) _ensure_bucket_exists(self.s3, settings.AWS_S3_EXPORT_BUCKET) _ensure_bucket_exists(self.s3, settings.AWS_S3_BUILDS_BUCKET) self.buckets = { 'source': self.s3.get_bucket(settings.AWS_S3_SOURCE_BUCKET), 'export': self.s3.get_bucket(settings.AWS_S3_EXPORT_BUCKET), 'builds': self.s3.get_bucket(settings.AWS_S3_BUILDS_BUCKET), } self.configured = True else: self.s3 = None self.buckets = None
def deploy(self): """ Deploy this quiz JSON to S3. """ if not self.slug: return data = json.dumps(self.flatten()) s3 = boto.connect_s3() gzip_buffer = StringIO() with gzip.GzipFile(fileobj=gzip_buffer, mode='w') as f: f.write(data) data = gzip_buffer.getvalue() s3 = boto.connect_s3() for bucket_name in app_config.S3_BUCKETS: bucket = s3.get_bucket(bucket_name) k = Key(bucket, '%s/live-data/games/%s.json' % (app_config.PROJECT_SLUG, self.slug)) k.set_contents_from_string(data, headers={ 'Content-Type': 'application/json', 'Content-Encoding': 'gzip', 'Cache-Control': 'max-age=5' }) k.set_acl('public-read')
def connect(aws_access_key_id=None, aws_secret_access_key=None): """ uses api_key and secret_key if available or falls back on ENV variables AWS_ACCESS_KEY_ID && AWS_SECRET_ACCESS_KEY """ if aws_access_key_id or aws_secret_access_key is None: return boto.connect_s3() else: log.debug('Falling back to ENV variables $AWS_ACCESS_KEY && $AWS_SECRET_KEY') return boto.connect_s3(aws_access_key_id, aws_secret_access_key)
def get_s3_bucket(bucket_name, config): if bucket_name not in S3_BUCKETS: if config.aws_access_key and config.aws_secret_key: s3conn = boto.connect_s3(config.aws_access_key, config.aws_secret_key) else: s3conn = boto.connect_s3() # use local boto config or IAM profile S3_BUCKETS[bucket_name] = s3conn.get_bucket(bucket_name) return S3_BUCKETS[bucket_name]
def s3(self): if not self.__s3_conn: if self.config.AWS: self.__s3_conn = boto.connect_s3(self.config.AWS["key"], self.config.AWS["secret_key"]) else: self.__s3_conn = boto.connect_s3() return self.__s3_conn
def home(request): global location userInput = UserInputForm if 'import' in request.POST: newdoc = Document(docfile = request.FILES['upload']) newdoc.save() location = newdoc.path() # creating S3 bucket connection conn = boto.connect_s3('AKIAIJZ56E33VC2GBG3Q', 'xfSWxuK9uGAsRwtwdJgIPBhiye0Z3ka5oRqRa8FD') bucket = conn.create_bucket('client1.bucket') k = Key(bucket) filename = str(request.FILES['upload']) filenameKey = re.sub('\.txt$', '', filename) print filenameKey k.key = filenameKey k.set_contents_from_filename(location) return HttpResponseRedirect(reverse('upload.views.home')) else: form = DocumentForm() # An empty, unbound form if 'user_input' in request.POST: form = UserInputForm(request.POST) if form.is_valid(): form.save() myfile = open(os.path.dirname(os.path.abspath(upload.__file__))+ "/media/Watchlists/userinput.txt", 'a') myfile.write(request.POST['keyword'] + "\n") myfile.close() location = os.path.dirname(os.path.abspath(upload.__file__))+ "/media/Watchlists/userinput.txt" conn = boto.connect_s3('AKIAIJZ56E33VC2GBG3Q', 'xfSWxuK9uGAsRwtwdJgIPBhiye0Z3ka5oRqRa8FD') bucket = conn.create_bucket('client1.bucket') k = Key(bucket) filenameKey = "userinput" print filenameKey k.key = filenameKey k.set_contents_from_filename(location) return HttpResponseRedirect(reverse('upload.views.home')) else: form = UserInputForm() # Load documents for the list page documents = Document.objects.all() # Rendner list page with the documents and the form return render_to_response( 'upload/parallax.html', {'documents': documents, 'form' : form, 'userInput' : userInput}, context_instance = RequestContext(request) )
def __init__(self, mega_stack_name, name, params, template_name, region, sns_topic_arn, tags=None, depends_on=None): self.logger = logging.getLogger(__name__) if mega_stack_name == name: self.cf_stack_name = name else: self.cf_stack_name = "%s-%s" % (mega_stack_name, name) self.mega_stack_name = mega_stack_name self.name = name self.yaml_params = params self.params = {} self.template_name = template_name self.template_body = '' self.template_url = False if depends_on is None: self.depends_on = None else: self.depends_on = [] for dep in depends_on: if dep == mega_stack_name: self.depends_on.append(dep) else: self.depends_on.append("%s-%s" % (mega_stack_name, dep)) self.region = region self.sns_topic_arn = sns_topic_arn # Safer than setting default value for tags = {} if tags is None: self.tags = {} else: self.tags = tags try: # catch S3 url template names m = re.match(r'(https?|s3)://([^/]+)/(.+$)', self.template_name) if m: protocol, bucket, key = m.groups() if protocol == 's3': connect_s3().get_bucket(bucket).get_key(key).read() else: if not requests.get(self.template_name).ok: raise Exception else: open(self.template_name, 'r') except: self.logger.critical("Failed to open template file %s for stack %s" % (self.template_name, self.name)) exit(1) # check params is a dict if set if self.yaml_params and type(self.yaml_params) is not dict: self.logger.critical( "Parameters for stack %s must be of type dict not %s", self.name, type(self.yaml_params)) exit(1) self.cf_stacks = {} self.cf_stacks_resources = {}
def upload_to_s3(bucketname, tiles): # pragma: no cover tiles = os.path.abspath(tiles) conn = boto.connect_s3() bucket = conn.get_bucket(bucketname, validate=False) result = { 'tile_changed': 0, 'tile_deleted': 0, 'tile_unchanged': 0, 'tile_new': 0, 's3_put': 0, 's3_list': 0, } def _key(name): try: return int(name) except Exception: return -1 for name in sorted(os.listdir(tiles), key=_key): folder = os.path.join(tiles, name) if not os.path.isdir(folder): continue for root, dirs, files in os.walk(folder): rel_root = 'tiles/' + root.lstrip(tiles) + '/' rel_root_len = len(rel_root) filtered_files = [f for f in files if f.endswith('.png')] if not filtered_files: continue # get all the keys keys = {} result['s3_list'] += 1 for key in bucket.list(prefix=rel_root): rel_name = key.name[rel_root_len:] keys[rel_name] = key for f in filtered_files: filename = root + os.sep + f keyname = rel_root + f key = keys.pop(f, None) changed = True if key is not None: if os.path.getsize(filename) != key.size: # do the file sizes match? changed = True else: remote_md5 = key.etag.strip('"') with open(filename, 'rb') as fd: local_md5 = hashlib.md5(fd.read()).hexdigest() if local_md5 == remote_md5: # do the md5/etags match? changed = False if changed: if key is None: result['tile_new'] += 1 key = boto.s3.key.Key(bucket) key.key = keyname else: result['tile_changed'] += 1 # upload or update the key result['s3_put'] += 1 key.set_contents_from_filename(filename, headers=IMAGE_HEADERS, reduced_redundancy=True) else: result['tile_unchanged'] += 1 # delete orphaned files for rel_name, key in keys.items(): result['tile_deleted'] += 1 key.delete() # Update status file data = {'updated': util.utcnow().isoformat()} k = boto.s3.key.Key(bucket) k.key = 'tiles/data.json' k.set_contents_from_string(dumps(data), headers=JSON_HEADERS, reduced_redundancy=True) return result
def create_bucket(self): self.conn = boto.connect_s3() self.conn.create_bucket(BUCKET)
def convertvid(ytid, options, convertedname, taskid, fetchandconvert, duration, id3): tmpfile = "/mnt/%s" % (taskid) tmpconverted = "/mnt/%s" % (convertedname) start = getseconds("00:" + options["youtube_start"]) end = getseconds("00:" + options["youtube_end"]) duration = end - start conn = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY) #1) check if file exists on local disk if os.path.isfile(tmpfile) == False: # 1.1) get file from S3 bucket = conn.get_bucket(settings.S3_YT_RAW_BUCKET) k = Key(bucket) k.key = ytid k.get_contents_to_filename(tmpfile) # 2) Run ffmpeg - ffmpeg -i input.flv -ab 128k output.mp3 #todo: use actual options for transcoding quality = options['transcoder_quality'] + 'k' extraargs = [] format = tmpconverted[-3:] if format == "m4r": # m4a is same as m4r tmpconverted = tmpconverted[:-3] + "m4a" extraargs = ["-vn"] if format == "mp4": # use MP4Box to do the job, since if the original video has a not 1:1 PAR, and # has a small width than 360, the script will fail, so we use MP4Box instead import shutil shutil.move(tmpfile, tmpfile + ".mp4") tmpfile = tmpfile + ".mp4" ffmpegcmd = "MP4Box -add %s %s" % (tmpfile, tmpconverted) elif format == "flv" or format == "wmv": # These are video formats ffmpegcmd = " ".join([ "ffmpeg", "-ss", str(start), "-i", tmpfile, "-y", "-t", str(duration), "-vf", '"scale=-1:360"' ] + extraargs + [tmpconverted]) elif format == "avi": ffmpegcmd = " ".join([ "ffmpeg", "-ss", str(start), "-i", tmpfile, "-y", "-t", str(duration), "-ab", quality, "-vf", '"scale=-1:360"' ] + extraargs + [tmpconverted]) elif format == "wav": ffmpegcmd = " ".join([ "ffmpeg", "-ss", str(start), "-i", tmpfile, "-y", "-t", str(duration) ] + extraargs + [tmpconverted]) else: #audio only ffmpegcmd = " ".join([ "ffmpeg", "-ss", str(start), "-i", tmpfile, "-y", "-t", str(duration), "-ab", quality ] + extraargs + [tmpconverted]) print ffmpegcmd thread = pexpect.spawn(ffmpegcmd) cpl = thread.compile_pattern_list([pexpect.EOF, ".*time=([^\s]*)", '(.+)']) while True: i = thread.expect_list(cpl, timeout=None) if i == 0: # EOF print "the sub process exited" break elif i == 1: timestamp = thread.match.group(1) print timestamp t = getseconds(timestamp) print t, duration pct = 30 + ((t / duration) * 60) fetchandconvert.update_state(state="CONVERTING", meta={"progress": int(pct)}) thread.close #p = subprocess.Popen(["ffmpeg", "-i", tmpfile, "-y", "-ab", quality] + extraargs + [tmpconverted], stdout=subprocess.PIPE) #com = p.communicate() #print com # 3) Store output into S3 # === POSTPROCESSING === # ID3 tags if id3 is not None: audio = EasyID3(tmpconverted) for key in id3: audio[key] = id3[key] audio.save() bucket = conn.get_bucket(settings.S3_YT_PRO_BUCKET) k = Key(bucket) k.key = convertedname k.set_contents_from_filename(tmpconverted) newfile = bucket.get_key(k.key) newfile.change_storage_class('REDUCED_REDUNDANCY') # 4) Return pass
def main(argv): parser = OptionParser(usage="usage: %prog [options]", version="%prog 1.0") parser.add_option('-b', '--bucket', dest='logBucket', type='string', \ help='Specify the S3 bucket containing AWS logs') parser.add_option('-d', '--debug', action='store_true', dest='debug', \ help='Increase verbosity') parser.add_option('-l', '--log', dest='logFile', type='string', \ help='Local log file') parser.add_option('-j', '--json', action='store_true', dest='dumpJson', \ help='Reformat JSON message (default: raw)') #Beware, once you delete history it's gone. parser.add_option('-D', '--delete', action='store_true', dest='deleteFile', \ help='Delete processed files from the AWS S3 bucket') parser.add_option('-s', '--state', dest='state', type='string', \ help="State file for keeping track of what logs you already processed.") (options, args) = parser.parse_args() state_tracker = None if options.debug: print '+++ Debug mode on' if options.logBucket == None: print 'ERROR: Missing an AWS S3 bucket! (-b flag)' sys.exit(1) if options.logFile == None: print 'ERROR: Missing a local log file! (-l flag)' sys.exit(1) if options.state: import sqlite3 try: state_tracker = sqlite3.connect(options.state) state_tracker.execute("select count(*) from log_progress") except sqlite3.OperationalError: state_tracker.execute( "create table log_progress (log_name 'text' primary key, processed_date 'TEXT')" ) if options.debug: print '+++ Connecting to Amazon S3' s3 = boto.connect_s3() c = s3.get_bucket(options.logBucket) try: c = s3.get_bucket(options.logBucket) except boto.exception.S3ResponseError as e: print "Bucket %s access error: %s" % (options.logBucket, e) sys.exit(3) for f in c.list(): newFile = os.path.basename(str(f.key)) if re.match('.+_CloudTrail-Digest_.+', newFile): if options.debug: print "Skipping digest file: %s" % newFile continue if newFile != "": if already_processed(newFile, state_tracker): if options.debug: print "Skipping previously seen file {file}".format( file=newFile) continue if options.debug: print "+++ Found new log: ", newFile f.get_contents_to_filename(newFile) data = gzip.open(newFile, 'rb') try: log = open(options.logFile, 'ab') except IOError as e: print "ERROR: Cannot open %s (%s)" % (options.logFile, e.strerror) sys.exit(1) if options.dumpJson == None: log.write(data.read()) log.write("\n") else: j = json.load(data) if "Records" not in j: continue for json_event in j["Records"]: new_dict = {} for key in json_event: if json_event[key]: new_dict[key] = json_event[key] new_dict['log_file'] = newFile aws_log = {'aws': new_dict} # Copy 'aws.sourceIPAddress' and 'aws.userIdentity.userName' to standard fields 'srcip' and 'user' so 'srcip' can be used in Wazuh GeoIP lookups and <same_user /> and <same_source_ip /> can be used in composite rules. if 'sourceIPAddress' in aws_log["aws"]: aws_log["srcip"] = aws_log["aws"]["sourceIPAddress"] if 'userIdentity' in aws_log[ "aws"] and 'userName' in aws_log["aws"][ "userIdentity"]: aws_log["user"] = aws_log["aws"]["userIdentity"][ "userName"] log.write("{0}\n".format(json.dumps(aws_log))) log.close() try: os.remove(newFile) except IOError as e: print "ERROR: Cannot delete %s (%s)" % (newFile, e.strerror) if options.deleteFile: c.delete_key(f.key) mark_complete(newFile, state_tracker)
def test_delete_missing_key(): conn = boto.connect_s3('the_key', 'the_secret') bucket = conn.create_bucket('foobar') deleted_key = bucket.delete_key("foobar") deleted_key.key.should.equal("foobar")
def becomeJester(): dirname = os.getcwd() f = open(f"{dirname}/linodeCreds.txt") access_key = f.readline() secret_key = f.readline() f.close() access_key = access_key.strip() secret_key = secret_key.strip() conn = boto.connect_s3( aws_access_key_id = access_key, aws_secret_access_key = secret_key, host = 'us-east-1.linodeobjects.com', #is_secure=False, # uncomment if you are not using ssl calling_format = boto.s3.connection.OrdinaryCallingFormat(), ) for bucket in conn.get_all_buckets(): name = bucket.name, created = bucket.creation_date, print (f"{name}\t{created}") for key in bucket.list(): #print(key.get_acl()) name = key.name, size = key.size, modified = key.last_modified, print (f"{name}\t{size}\t{modified}") try: if not os.path.isfile(f'{dirname}/scavenger-bucket/{name[0]}'): try: #create folder if "/" in name[0]: folderPath = name[0].split('/') print("folderPath: ") print(folderPath[0]) os.makedirs(f'{dirname}/scavenger-bucket/{folderPath[0]}') #then add file key.get_contents_to_filename(f'{dirname}/scavenger-bucket/{folderPath[0]}{folderPath[1]}') else: key.get_contents_to_filename(f'{dirname}/scavenger-bucket/{name[0]}') except: pass except: pass #get most recent file list_of_files = glob.glob(f'{dirname}/scavenger-bucket/*') # * means all if need specific format then *.csv latest_file = max(list_of_files, key=os.path.getctime) #print (latest_file) j = open(latest_file) jesterCommand = j.readline() j.close() print(latest_file) print(jesterCommand) return(jesterCommand)
p = Popen(dumpCommand, shell=True, stdout=PIPE) with gzip.open(mysqlfile, "wb") as f: f.writelines(p.stdout) exitcode = p.wait() print("exitcode = " + str(exitcode)) currentTime = datetime.now() if 0 == exitcode: print(" backupcompleted at : " + str(currentTime) + ' starting upload') #we have exception if upload fails try: s3Conn = boto.connect_s3(os.environ['S3_ACCESSID'], os.environ['S3_ACCESSKEY']) bucket = s3Conn.get_bucket(os.environ['S3_BUCKET']) s3Key = Key(bucket) s3Key.key = 'dbbackup/' + os.environ[ 'BACKUP_NAME'] + '/' + fileNameOnly print("uploading to :" + os.environ['S3_BUCKET'] + " : " + s3Key.name) s3Key.set_contents_from_filename(mysqlfile) os.remove(mysqlfile) print("back up sucess") break except Exception, e:
#!/usr/bin/env python import boto from boto.s3.key import Key OrdinaryCallingFormat = boto.config.get( 's3', 'calling_format', 'boto.s3.connection.OrdinaryCallingFormat') s3 = boto.connect_s3(host='localhost', port=10001, calling_format=OrdinaryCallingFormat, is_secure=False) b = s3.create_bucket('mocking') keys = b.get_all_keys(prefix='level') print 'TEST 1' for key in keys: print repr(key) keys = b.get_all_keys(max_keys=2) print 'TEST 2' for key in keys: print repr(key)
import boto keyId = "your_aws_access_key_id" sKeyId = "your_aws_secret_key_id" conn = boto.connect_s3(keyId, sKeyId) srcBucket = conn.get_bucket('mybucket001') #Source Bucket Object dstBucket = conn.get_bucket('mybucket002') #Destination Bucket Object fileName = "abc.txt" #Call the copy_key() from destination bucket dstBucket.copy_key(fileName, srcBucket.name, fileName)
def _get_bucket(self, bucket_name, validate=False): s3 = boto.connect_s3(g.S3KEY_ID or None, g.S3SECRET_KEY or None) bucket = s3.get_bucket(bucket_name, validate=validate) return bucket
def saveRecentResults(electionID, idList, timestamp): # check if file exists already if os.path.exists('recentResults.json'): print "Results file exists, updating" with open('recentResults.json', 'r') as recentResultsFile: # Convert the results to a list of datetime objects tempList = [] recentResults = json.load(recentResultsFile) print "oldresults", recentResults for result in recentResults[electionID]: tempList.append(datetime.strptime(result, "%Y%m%d%H%M%S")) # Sort it tempList.sort(reverse=True) # Check if it's less than 20 and append the new timestamp if len(tempList) < 20: print "Less than twenty results, appending latest now" tempList.append(datetime.strptime(timestamp, "%Y%m%d%H%M%S")) tempList.sort(reverse=True) for i in xrange(0, len(tempList)): tempList[i] = datetime.strftime(tempList[i], '%Y%m%d%H%M%S') recentResults[electionID] = tempList # If it's 20, remove the oldest timestamp, then append the new one elif len(tempList) == 20: print "Twenty results, removing oldest and appending newest" del tempList[-1] tempList.append(datetime.strptime(timestamp, "%Y%m%d%H%M%S")) tempList.sort(reverse=True) for i in xrange(0, len(tempList)): tempList[i] = datetime.strftime(tempList[i], '%Y%m%d%H%M%S') recentResults[electionID] = tempList # Write the new version print "newresults", recentResults newJson = json.dumps(recentResults, indent=4) with open('recentResults.json', 'w') as fileOut: fileOut.write(newJson) print "Finished saving results log locally" print "Connecting to S3" conn = boto.connect_s3(AWS_KEY, AWS_SECRET) bucket = conn.get_bucket('gdn-cdn') from boto.s3.key import Key k = Key(bucket) k.key = "2018/07/aus-byelections/recentResults.json".format( timestamp=timestamp) k.set_metadata("Cache-Control", "max-age=180") k.set_metadata("Content-Type", "application/json") k.set_contents_from_string(newJson) k.set_acl("public-read") print "Done, JSON is updated" # Otherwise start a new file else: print "No results file, making one now" # electionIDs = ['22692','22693','22694','22695','22696'] # testIDs = ['21364','21379'] jsonObj = {} for id in idList: jsonObj[id] = [] jsonObj[electionID].append(timestamp) newJson = json.dumps(jsonObj, indent=4) with open('recentResults.json', 'w') as fileOut: fileOut.write(newJson) print "Finished creating results log" print "Connecting to S3" conn = boto.connect_s3(AWS_KEY, AWS_SECRET) bucket = conn.get_bucket('gdn-cdn') from boto.s3.key import Key k = Key(bucket) k.key = "2018/07/aus-byelections/recentResults.json".format( timestamp=timestamp) k.set_metadata("Cache-Control", "max-age=90") k.set_metadata("Content-Type", "application/json") k.set_contents_from_string(newJson) k.set_acl("public-read") print "Done, JSON is updated"
def test_missing_key_urllib2(): conn = boto.connect_s3('the_key', 'the_secret') conn.create_bucket("foobar") urlopen.when.called_with( "http://foobar.s3.amazonaws.com/the-key").should.throw(HTTPError)
def get_bucket_size(self): try: s3conn = boto.connect_s3() except boto.exception.BotoServerError, e: print e sys.exit(1)
#!/usr/bin/env python3 import os import boto from flask import Flask from config import ecs_test_drive app = Flask(__name__) #### Get ECS credentials from external config file ecs_access_key_id = ecs_test_drive['ecs_access_key_id'] ecs_secret_key = ecs_test_drive['ecs_secret_key'] bucket_name = ecs_test_drive['bucket_name'] ## Open a session with your ECS session = boto.connect_s3(ecs_access_key_id, ecs_secret_key, host='object.ecstestdrive.com') ## Get hold of your bucket b = session.get_bucket(bucket_name) print("ECS connection is: " + str(session)) print("Bucket is: " + str(b)) print("Uploading photos ...") ## Create a list of filenames in "photos" to upload to ECS for each_photo in os.listdir("photos"): print("Uploading " + str(each_photo)) k = b.new_key(each_photo) src = os.path.join("photos", each_photo) k.set_contents_from_filename(src) k.set_acl('public-read')
# /bin/python import boto import boto.s3.connection import glob import ntpath import socket #import key from key import * listing = glob.glob('/var/log/ceph/*.log') for files in listing: if "radosgw" in files: filename=ntpath.basename(files)+"-"+socket.gethostname() else: filename=ntpath.basename(files) print filename print files conn = boto.connect_s3(aws_access_key_id = access_key,aws_secret_access_key = secret_key,port=port_number, debug=2,host = hostname, is_secure=False, calling_format = boto.s3.connection.OrdinaryCallingFormat(),) bucket = conn.create_bucket('log_bucket') key = bucket.new_key(filename); key.set_contents_from_filename(files);
def run(args): utils.pessimistic_connection_handling() # Setting up logging log = os.path.expanduser(configuration.get('core', 'BASE_LOG_FOLDER')) directory = log + "/{args.dag_id}/{args.task_id}".format(args=args) if not os.path.exists(directory): os.makedirs(directory) args.execution_date = dateutil.parser.parse(args.execution_date) iso = args.execution_date.isoformat() filename = "{directory}/{iso}".format(**locals()) # store old log (to help with S3 appends) if os.path.exists(filename): with open(filename, 'r') as logfile: old_log = logfile.read() else: old_log = None subdir = process_subdir(args.subdir) logging.root.handlers = [] logging.basicConfig(filename=filename, level=settings.LOGGING_LEVEL, format=settings.LOG_FORMAT) if not args.pickle: dagbag = DagBag(subdir) if args.dag_id not in dagbag.dags: msg = 'DAG [{0}] could not be found in {1}'.format( args.dag_id, subdir) logging.error(msg) raise AirflowException(msg) dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) else: session = settings.Session() logging.info('Loading pickle id {args.pickle}'.format(**locals())) dag_pickle = session.query(DagPickle).filter( DagPickle.id == args.pickle).first() if not dag_pickle: raise AirflowException("Who hid the pickle!? [missing pickle]") dag = dag_pickle.pickle task = dag.get_task(task_id=args.task_id) task_start_date = None if args.task_start_date: task_start_date = dateutil.parser.parse(args.task_start_date) task.start_date = task_start_date ti = TaskInstance(task, args.execution_date) if args.local: print("Logging into: " + filename) run_job = jobs.LocalTaskJob( task_instance=ti, mark_success=args.mark_success, force=args.force, pickle_id=args.pickle, task_start_date=task_start_date, ignore_dependencies=args.ignore_dependencies, pool=args.pool) run_job.run() elif args.raw: ti.run( mark_success=args.mark_success, force=args.force, ignore_dependencies=args.ignore_dependencies, job_id=args.job_id, pool=args.pool, ) else: pickle_id = None if args.ship_dag: try: # Running remotely, so pickling the DAG session = settings.Session() pickle = DagPickle(dag) session.add(pickle) session.commit() pickle_id = pickle.id print(('Pickled dag {dag} ' 'as pickle_id:{pickle_id}').format(**locals())) except Exception as e: print('Could not pickle the DAG') print(e) raise e executor = DEFAULT_EXECUTOR executor.start() print("Sending to executor.") executor.queue_task_instance( ti, mark_success=args.mark_success, pickle_id=pickle_id, ignore_dependencies=args.ignore_dependencies, force=args.force) executor.heartbeat() executor.end() if configuration.get('core', 'S3_LOG_FOLDER').startswith('s3:'): import boto s3_log = filename.replace(log, configuration.get('core', 'S3_LOG_FOLDER')) bucket, key = s3_log.lstrip('s3:/').split('/', 1) if os.path.exists(filename): # get logs with open(filename, 'r') as logfile: new_log = logfile.read() # remove old logs (since they are already in S3) if old_log: new_log.replace(old_log, '') try: s3 = boto.connect_s3() s3_key = boto.s3.key.Key(s3.get_bucket(bucket), key) # append new logs to old S3 logs, if available if s3_key.exists(): old_s3_log = s3_key.get_contents_as_string().decode() new_log = old_s3_log + '\n' + new_log # send log to S3 encrypt = configuration.get('core', 'ENCRYPT_S3_LOGS') s3_key.set_contents_from_string(new_log, encrypt_key=encrypt) except: print('Could not send logs to S3.')
def get_conn(accesskey, secretkey): if accesskey and secretkey: return boto.connect_s3(accesskey, secretkey) else: return boto.connect_s3()
#!/usr/bin/python # # TimeLapse Downloader # import time import boto import subprocess from boto.s3.key import Key #get access to S3 conn = boto.connect_s3('access_key', 'secret_key') #our bucket name bucketname = 'plant-photos' #get access to bucket bucket = conn.get_bucket(bucketname) list = bucket.list() i = 0 for key in list: fname = 'images/' + str(i).zfill(6) + '.jpg' print fname, key.name ky = Key(bucket, key) ky.get_contents_to_filename(fname) i = i + 1
def main(): # parse options from the command line parser = argparse.ArgumentParser( prog='PROG', formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent('''\ ------------------------------------------------------------------------------------------------------------- This is a deep neural network architecture for training sparse filters. Example uses: $ python test.py $ python test.py -m GroupSF -v 1 -g 3 -s 1 $ python test.py -m ConvolutionalSF -d 16 1 8 8 -v 1 -w y -c y -f CIFAR_data.mat -i 100 $ python test.py -m ConvolutionalSF ConvolutionalSF -d 16 1 6 6 16 16 4 4 -w y -c y -f CIFAR_data.mat -i 100 150 -t y -v 1 ------------------------------------------------------------------------------------------------------------- ''') ) parser.add_argument("-m", "--model", default=['SparseFilter'], nargs='+', help="the model type") parser.add_argument("-c", "--convolution", default="n", help="convolution, yes or no") parser.add_argument("-f", "--filename", default="patches.mat", help="the data filename") parser.add_argument("-d", "--dimensions", type=int, nargs='+', default=([100, 256]), help="the dimensions of the model: [neurons, input size] or [neurons, length, width]") parser.add_argument("-p", "--pool", type=int, nargs='+', default=None, help="pooling dimensions") parser.add_argument("-g", "--group", type=int, default=None, help="group size") parser.add_argument("-s", "--step", type=int, default=None, help="step size") parser.add_argument("-l", "--learn_rate", type=float, default=.001, help="learning rate") parser.add_argument("-i", "--iterations", type=int, nargs='+', default=[100], help="number of iterations") parser.add_argument("-v", "--verbosity", type=int, default=0, help="verbosity: 0 no plot; 1 plots") parser.add_argument("-o", "--opt", default="GD", help="optimization method: GD or L-BFGS") parser.add_argument("-w", "--whitening", default='n', help="whitening: 'y' or 'n'") parser.add_argument("-t", "--test", default='n', help="test classification performance: 'y' or 'n'") parser.add_argument("-a", "--channels", type=int, default=1, help="number of channels in data") parser.add_argument("-e", "--examples", type=int, default=None, help="number of training examples") parser.add_argument("-b", "--batch_size", type=int, default=1000, help="number of examples in [mini]batch") parser.add_argument("-z", "--aws", default='n', help="run on aws: 'y' or 'n'") parser.add_argument("-r", "--random", default='n', help="type of batches: random = 'y'") args = parser.parse_args() args.dimensions = parse_dims(args) args.iterations = parse_iter(args) ''' =================================== Load in the data =================================== ''' # load in data print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", args.filename) data = loadmat(file_path)['X'] # reshape and preprocess data print "pre-processing data ..." video = None if args.filename == 'patches_video.mat': video = data data = data.reshape(data.shape[0] * data.shape[1], data.shape[2]).T if args.convolution == 'n': if args.whitening == 'y': data -= data.mean(axis=0) data = whiten(data.T).T elif args.whitening == 'n' and args.channels == 1: data -= data.mean(axis=0) # elif args.whitening == 'n' and args.channels == 3: # data = np.float32(data) data = np.float32(data.T) elif args.convolution == 'y': if args.filename == 'kyotoData.mat': data = np.float32(data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1])))) data = scaling.LCNinput(data, kernel_shape=9) elif args.filename == 'CIFAR_data.mat': data = np.float32(data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1])))) data = scaling.LCNinput(data, kernel_shape=5) data = data[0:args.examples, :, :, :] elif args.filename == 'STL_10.mat' or args.filename == 'Lenna.mat': data = np.float32(data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3)))) data = data[0:args.examples, :, :, :] args.channels = data.shape[1] for channel in range(args.channels): data[:, channel, :, :] = np.reshape(scaling.LCNinput(data[:, channel, :, :]. reshape((data.shape[0], 1, data.shape[2], data.shape[3])), kernel_shape=9), ( data.shape[0], data.shape[2], data.shape[3])) # assert that batch size is valid and get number of batches n_batches, rem = divmod(data.shape[0], args.batch_size) assert rem == 0 # other assertions assert len(args.model) == len(args.iterations) if args.model[0] == 'GroupSF' or args.model[0] == 'GroupConvolutionalSF': assert args.group is not None assert args.step is not None # assert that the number of neurons in each layer is a perfect square for layer in xrange(len(args.dimensions)): assert np.sqrt(args.dimensions[layer][0]) % np.floor(np.sqrt(args.dimensions[layer][0])) == 0 ''' ============================= Build and train the network ============================= ''' # construct the network print "building model..." model = sf.Network( model_type=args.model, weight_dims=args.dimensions, p=args.pool, group_size=args.group, step=args.step, lr=args.learn_rate, opt=args.opt, c=args.convolution, test=args.test, batch_size=args.batch_size, random=args.random, weights=None ) # TODO: custom learning rates for each layer # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(data) # train the sparse filtering network print "training network..." t = time.time() cost = {} weights = {} for l in xrange(model.n_layers): cost_layer = [] w = None # iterate over training epochs if args.opt == 'GD': for epoch in xrange(args.iterations[l]): # go though [mini]batches for batch_index in xrange(n_batches): c, w = train[l](index=batch_index) cost_layer.append(c) print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) elif args.opt == 'L-BFGS': w = minimize(train[l], model.layers[l].w.eval().flatten(), method='L-BFGS-B', jac=True, options={'maxiter': args.iterations[l], 'disp': True}) if args.convolution == 'n': w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1]) elif args.convolution == 'y': w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1], args.dimensions[0][2], args.dimensions[0][3]) # add layer cost and weights to the dictionaries cost['layer' + str(l)] = cost_layer weights['layer' + str(l)] = w # calculate and display elapsed training time elapsed = time.time() - t print('Elapsed training time: %f' % elapsed) # create sub-folder for saved model if args.aws == 'n': directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) elif args.aws == 'y': import boto from boto.s3.key import Key s3 = boto.connect_s3() my_bucket = 'dlacombejr.bucket' bucket = s3.get_bucket(my_bucket) k = Key(bucket) directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) # save the model for later use full_path = directory_name + '/model.pkl' pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL) if args.aws == 'y': k.key = full_path k.set_contents_from_filename(full_path) os.remove(full_path) # save weights separately savemat(directory_name + '/weights.mat', weights) if args.aws == 'y': k.key = directory_name + '/weights.mat' k.set_contents_from_filename(directory_name + '/weights.mat') os.remove(directory_name + '/weights.mat') # save the cost functions savemat(directory_name + '/cost.mat', cost) if args.aws == 'y': k.key = directory_name + '/cost.mat' k.set_contents_from_filename(directory_name + '/cost.mat') os.remove(directory_name + '/cost.mat') # create log file log_file = open(directory_name + "/log.txt", "wb") # todo: create log file by looping through args # for arg in args: # log_file.write( # args. # ) for m in range(len(args.model)): log_file.write( "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m, args.model[m], args.dimensions[m], args.iterations[m]) ) if args.model == 'GroupSF' or args.model == 'GroupConvolutionalSF': log_file.write( " Groups: %d \n Step: %d" % (args.group, args.step) ) ex = data.shape[0] if args.examples is not None: ex = args.examples log_file.write( " Data-set: %s \n Examples: %6d \n Whitened: %s" % (args.filename, ex, args.whitening) ) log_file.write('\nElapsed training time: %f' % elapsed) log_file.close() if args.aws == 'y': k.key = directory_name + "/log.txt" k.set_contents_from_filename(directory_name + "/log.txt") os.remove(directory_name + "/log.txt") ''' =============================== Verbosity Options ===================================== ''' # get variables and saves if args.verbosity >= 1: # # get variables of interest # activations_norm = {} # activations_raw = {} # activations_shuffled = {} # reconstruction = {} # error_recon = {} # pooled = {} # for l in xrange(len(args.dimensions)): # activations_norm['layer' + str(l)] = {} # activations_raw['layer' + str(l)] = {} # activations_shuffled['layer' + str(l)] = {} # reconstruction['layer' + str(l)] = {} # error_recon['layer' + str(l)] = {} # pooled['layer' + str(l)] = {} for batch in xrange(n_batches): # get variables of interest activations_norm = {} activations_raw = {} activations_shuffled = {} reconstruction = {} error_recon = {} pooled = {} # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]() begin = batch * args.batch_size end = begin + args.batch_size f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data[begin:end]) # activations_norm['layer' + str(l)]['batch' + str(batch)] = f_hat # activations_raw['layer' + str(l)]['batch' + str(batch)] = f # activations_shuffled['layer' + str(l)]['batch' + str(batch)] = f_hat_shuffled # reconstruction['layer' + str(l)]['batch' + str(batch)] = err # error_recon['layer' + str(l)]['batch' + str(batch)] = rec # pooled['layer' + str(l)]['batch' + str(batch)] = p # define [mini]batch title batch_title = 'layer' + str(l) + '_batch' + '%03d' % batch # define norm and raw file names norm_file_name = directory_name + '/activations_norm_' + batch_title + '.mat' raw_file_name = directory_name + '/activation_raw_' + batch_title + '.mat' activations_norm[batch_title] = f_hat activations_raw[batch_title] = f activations_shuffled[batch_title] = f_hat_shuffled reconstruction[batch_title] = err error_recon[batch_title] = rec pooled[batch_title] = p # save model as well as weights and activations separately savemat(norm_file_name, activations_norm) # savemat(raw_file_name, activations_raw) if args.aws == 'y': k.key = norm_file_name k.set_contents_from_filename(norm_file_name) os.remove(norm_file_name) # k.key = raw_file_name # k.set_contents_from_filename(raw_file_name) # os.remove(raw_file_name) # savemat(directory_name + '/weights.mat', weights) # if args.aws == 'y': # k.key = directory_name + '/weights.mat' # k.set_contents_from_filename(directory_name + '/weights.mat') # os.remove(directory_name + '/weights.mat') # # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]() # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l](data[0:args.batch_size]) # # activations_norm['layer' + str(l)] = f_hat # activations_raw['layer' + str(l)] = f # activations_shuffled['layer' + str(l)] = f_hat_shuffled # reconstruction['layer' + str(l)] = err # error_recon['layer' + str(l)] = rec # pooled['layer' + str(l)] = p # # # save model as well as weights and activations separately # savemat(directory_name + '/weights.mat', weights) # savemat(directory_name + '/activations_norm.mat', activations_norm) # savemat(directory_name + '/activation_raw.mat', activations_raw) # output helper file for concatenating activations helper = {'batches': n_batches, 'output_size': f_hat.shape} helper_file_name = directory_name + '/helper.mat' savemat(helper_file_name, helper) if args.aws == 'y': k.key = helper_file_name k.set_contents_from_filename(helper_file_name) os.remove(helper_file_name) # get data if not on AWS if args.aws == 'n': f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data) activations_norm = {"layer0": f_hat} # display figures if args.verbosity == 2: # if GD, plot the cost function over time if args.opt == 'GD': visualize.plotCost(cost) # visualize the receptive fields of the first layer visualize.drawplots(weights['layer0'].T, color='gray', convolution=args.convolution, pad=0, examples=None, channels=args.channels) # visualize the distribution of lifetime and population sparseness for l in xrange(len(args.dimensions)): layer = 'layer' + str(l) if args.convolution == 'n': visualize.dispSparseHist(activations_norm[layer], l) elif args.convolution == 'y': visualize.dispSparseHist(activations_shuffled[layer].reshape(args.dimensions[l][0], data.shape[0] * activations_shuffled[layer].shape[2] * activations_shuffled[layer].shape[3]), layer=l) # visualize the distribution of activity across the "cortical sheet" and reconstruction if args.filename == 'patches_video.mat': f_hat = activations_norm['layer0'].T.reshape(video.shape[0], video.shape[1], args.dimensions[0][0]) visualize.videoCortex(f_hat[0:100, :, :], 'y', args.convolution, 1) else: visualize.drawplots(activations_norm['layer0'], color='gray', convolution=args.convolution, pad=1, examples=100) # # visualize reconstruction capabilities # if args.convolution == 'n': # visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', args.convolution, 1) # elif args.convolution == 'y': # visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'], # color='gray', convolution=args.convolution) # print('Reconstructed error: %e' % reconstruction['layer0']) # additional visualizations for convolutional network if args.convolution == 'y': dim = activations_raw['layer0'].shape[2] # visualize an example of a convolved image visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim) # print activations_raw['layer0'] # visualize max-pooled activations and LCN output visualize.visualize_convolved_image(pooled['layer0'][0, :, :, :].reshape(1, pooled['layer0'].shape[1], pooled['layer0'].shape[2], pooled['layer0'].shape[3]), dim=dim / 2) # visualize an example of a LCNed convolved image after max pooling # temp = activations_raw['layer0'] #[0, :, :, :] temp = pooled['layer0'] #[0, :, :, :] # print temp.shape for i in range(temp.shape[1]): temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape((1, 1, dim / 2, dim / 2)), kernel_shape=5) # temp = scaling.LCNinput(temp, kernel_shape=5) visualize.visualize_convolved_image(temp, dim=dim / 2) # print temp ''' ================================ Test the Model ======================================= ''' # test the model if evaluating classification performance if args.test == 'y': from sklearn import svm from sklearn.metrics import confusion_matrix train_labels = loadmat(file_path)['y'] file_path = os.path.join(base_path, "data", "CIFAR_test.mat") test_data = loadmat(file_path)['X'] test_labels = loadmat(file_path)['y'] # reshape and normalize the data if args.convolution == 'y': test_data = np.float32(test_data.reshape(-1, 1, int(np.sqrt(test_data.shape[1])), int(np.sqrt(test_data.shape[1])))) test_data = scaling.LCNinput(test_data, kernel_shape=5) test_data = test_data[0:args.examples, :, :, :] # get SVM test results for pixels to last layer train_input = None for layer in range(model.n_layers + 1): # pixel inputs if layer == 0: test_input = test_data.reshape(test_data.shape[0], test_data.shape[1] * test_data.shape[2] * test_data.shape[3]) train_input = data.reshape(data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]) # hidden layers elif layer > 0: # get the output of the current layer in the model given the training / test data and then reshape # TODO: use raw output as training and testing data? test_input = test[layer - 1](test_data[0:args.batch_size]) test_input = test_input[0].reshape(test_input[0].shape[0], test_input[0].shape[1] * test_input[0].shape[2] * test_input[0].shape[3]) train_input = activations_norm['layer' + str(layer - 1)] train_input = train_input.reshape(train_input.shape[0], train_input.shape[1] * train_input.shape[2] * train_input.shape[3]) # train linear support vector machine clf = svm.SVC(kernel="linear").fit(train_input, np.ravel(train_labels[0:args.examples])) # get predictions from SVM and calculate accuracy predictions = clf.predict(test_input) accuracy = clf.score(test_input, test_labels[0:args.examples]) # display results and log them print("Accuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy)) cm = confusion_matrix(test_labels[0:args.examples], predictions) log_file = open(directory_name + "/log.txt", "a") log_file.write( "\nAccuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy) ) log_file.close() # visualize the confusion matrix if args.test == 'y' and args.verbosity == 2: import pylab as pl pl.imshow(cm, interpolation='nearest') pl.title('Confusion Matrix for Network') pl.colorbar() pl.ylabel('True Label') pl.xlabel('Predicted Label') pl.show()
def test_s3_treestore(self): # Create an s3 backed treestore # Requires these environment variables set # # AWS_ACCESS_KEY_ID # AWS_SECRET_ACCESS_KEY # S3TS_BUCKET # # NB: **this will only work if the bucket is empty s3c = boto.connect_s3() bucket = s3c.get_bucket(os.environ['S3TS_BUCKET']) with EmptyS3Bucket(bucket): fileStore = S3FileStore(bucket) localCache = LocalFileStore( makeEmptyDir(os.path.join(self.workdir, 'cache'))) treestore = TreeStore.create(fileStore, localCache, TreeStoreConfig(100, True)) # Upload it as a tree creationTime = datetimeFromIso('2015-01-01T00:00:00.0') treestore.upload('v1.0', '', creationTime, self.srcTree, CaptureUploadProgress()) pkg = treestore.findPackage('v1.0') # Confirm it's in the index self.assertEquals(treestore.listPackages(), ['v1.0']) # Verify it treestore.verify(pkg) # Download it, checking we get expected progress callbacks cb = CaptureDownloadProgress() treestore.download(pkg, cb) self.assertEquals(sorted(cb.recorded), [30, 45, 47, 100, 100]) # Verify it locally treestore.verifyLocal(pkg) # Install it destTree = os.path.join(self.workdir, 'dest-1') treestore.install(pkg, destTree, CaptureInstallProgress()) # Check that the installed tree is the same as the source tree self.assertEquals( subprocess.call('diff -r -x {0} {1} {2}'.format( S3TS_PROPERTIES, self.srcTree, destTree), shell=True), 0) self.assertEquals(readInstallProperties(destTree).treeName, 'v1.0') # Use the compareInstall function to confirm the installed package is ok, and # then check that modifying the files show up in the comparison result = treestore.compareInstall(pkg, destTree) self.assertEquals(len(result.missing), 0) self.assertEquals(len(result.extra), 0) self.assertEquals(len(result.diffs), 0) with open(os.path.join(destTree, "code/file1.py"), "w") as f: f.write("x") with open(os.path.join(destTree, "code/file3.py"), "w") as f: f.write("y") os.unlink(os.path.join(destTree, 'assets/car-01.db')) result = treestore.compareInstall(pkg, destTree) self.assertEquals(result.missing, set(['assets/car-01.db'])) self.assertEquals(result.extra, set(['code/file3.py'])) self.assertEquals(result.diffs, set(['code/file1.py'])) # Reinstall to fix directory content shutil.rmtree(destTree) treestore.install(pkg, destTree, CaptureInstallProgress()) result = treestore.compareInstall(pkg, destTree) self.assertEquals(len(result.missing), 0) self.assertEquals(len(result.extra), 0) self.assertEquals(len(result.diffs), 0) # Now create a pre-signed version of the package pkg = treestore.findPackage('v1.0') treestore.addUrls(pkg, 3600) self.assertEquals(len(result.missing), 0) self.assertEquals(len(result.extra), 0) self.assertEquals(len(result.diffs), 0) # And download it directly via http. Create a new local cache # to ensure that we actually redownload each chunk localCache = LocalFileStore( makeEmptyDir(os.path.join(self.workdir, 'cache'))) treestore2 = TreeStore.forHttpOnly(localCache) cb = CaptureDownloadProgress() treestore2.downloadHttp(pkg, cb) self.assertEquals(sorted(cb.recorded), [30, 45, 47, 100, 100]) # Install it destTree2 = os.path.join(self.workdir, 'dest-2') treestore2.install(pkg, destTree2, CaptureInstallProgress()) # Check that the new installed tree is the same as the source tree self.assertEquals( subprocess.call('diff -r -x {0} {1} {2}'.format( S3TS_PROPERTIES, self.srcTree, destTree2), shell=True), 0) # Rename the tree, and check that installing that is the same treestore.rename('v1.0', 'v1.0x') pkg = treestore.findPackage('v1.0x') treestore.download(pkg, CaptureDownloadProgress()) destTree = os.path.join(self.workdir, 'dest-3') treestore.install(pkg, destTree, CaptureInstallProgress()) self.assertEquals( subprocess.call('diff -r -x {0} {1} {2}'.format( S3TS_PROPERTIES, self.srcTree, destTree), shell=True), 0) # Remove the tree treestore.remove('v1.0x')
def test_bucket_name_with_dot(): conn = boto.connect_s3() bucket = conn.create_bucket('firstname.lastname') k = Key(bucket, 'somekey') k.set_contents_from_string('somedata')
# how to capture variables passed from the command line in Python? import boto import boto.s3.connection access_key = 'your_access_key' secret_key = 'your_secret_key' conn = boto.connect_s3( aws_access_key_id = access_key, aws_secret_access_key = 'your_access_key' host = 'objects.dreamhost.com', calling_format = boto.s3.connection.OrdinaryCallingFormat(), ) newbucket = conn.create_bucket('the-ipanemas') for bucket in conn.get_all_buckets(): print "{name}\t{created}".format( name = bucket.name, created = bucket.creation_date, )
#! /usr/bin/env python import os, sys, boto from boto.s3.key import Key if len(sys.argv) != 3: print "Usage : python uploader.py <bucket name> <file absolute path>" print "Usage : Enter 2 arguments in command line!!" exit() # kwangje.park AWS_ACCESS_KEY = "key here" AWS_SECRET = "secret here" s3Connection = boto.connect_s3(AWS_ACCESS_KEY, AWS_SECRET) try: bucket = s3Connection.get_bucket(sys.argv[1]) uploader = Key(bucket) except: print "ERROR >> Connecting to bucket was failed !!!" exit() uploadFile = sys.argv[2] try: os.stat(uploadFile) except: print "ERROR >> File is not existed in " + uploadFile exit()
def deploy_static(app_name, env_name, domain, force): app = App(env_name, app_name) bucket_name = domain or '{}-{}'.format( config.get('system_name', uuid.uuid1().hex), app.repo.name) app.repo.fetch() version = app.repo.head_commit_id() s3 = boto.connect_s3() b = s3.lookup(bucket_name) if b is not None: version_key = b.get_key('__VERSION__') if version_key is not None: current_version = version_key.get_metadata('git-version') if version == current_version: if force: print '-----> Version {} already deployed, but re-deploying anyway'.format( version) else: print '-----> Version {} already deployed!'.format(version) return with lcd(app.repo.path): build_cmd = app.config.get('build_script') if build_cmd: print '-----> Building' local(build_cmd) if b is None: print '-----> Creating bucket {}'.format(bucket_name) b = s3.create_bucket(bucket_name) # TODO: this policy allows all users read access to all objects. # Need to find a way to limit access to __VERSION__ to only authenticated # users. public_access_policy = json.dumps({ "Version": "2012-10-17", "Statement": [{ "Sid": "PublicReadForGetBucketObjects", "Effect": "Allow", "Principal": "*", "Action": ["s3:GetObject"], "Resource": ["arn:aws:s3:::{}/*".format(bucket_name)] }] }) b.set_policy(public_access_policy) #b.configure_versioning(versioning=False) b.configure_website(suffix="index.html", error_key="error.html") def map_key_to_obj(m, obj): if obj.key != '__VERSION__': m[obj.key] = obj return m existing_keys = reduce(map_key_to_obj, b.get_all_keys(), {}) root = normpath(join(app.repo.path, app.config.get('root_dir', ''))) app_redirects = app.config.get('redirects', {}) for key_name in app_redirects.keys(): existing_keys.pop(key_name, None) print '-----> Uploading {} to {} bucket'.format(root, bucket_name) new_keys = [] updated_keys = [] for dirname, dirnames, filenames in walk(root): reldirname = relpath(dirname, root) reldirname = '' if reldirname == '.' else reldirname if os.path.commonprefix(['.git', reldirname]) == '.git': continue for filename in filenames: full_filename = join(reldirname, filename) if full_filename == '.s3': continue new_or_update = ' ' if existing_keys.has_key(full_filename): new_or_update = '[UPDATE]' updated_keys.append(full_filename) key = existing_keys.pop(full_filename) else: new_or_update = '[NEW] ' new_keys.append(full_filename) key = b.new_key(full_filename) print ' {} Uploading {}'.format(new_or_update, full_filename) key.set_contents_from_filename(join(dirname, filename)) if len(existing_keys) > 0: print '-----> WARNING: the following files are still present but no' print ' longer part of the website:' for k, v in existing_keys.iteritems(): print ' {}'.format(k) print '-----> Tagging bucket with git version {}'.format(version) version_key = b.get_key('__VERSION__') if version_key: version_key.delete() version_key = b.new_key('__VERSION__') version_key.set_metadata('git-version', version) version_key.set_contents_from_string('') print '-----> Setting up redirects' app_redirects = app.config.get('redirects', {}) if len(app_redirects) == 0: print ' No redirects.' else: def get_or_new_key(bucket, name): key = bucket.get_key(name) if key is not None: key.delete() return bucket.new_key(name) elb = boto.connect_elb() pybars_compiler = pybars.Compiler() for key_name, redirect_source in app_redirects.iteritems(): redirect_template = pybars_compiler.compile(redirect_source) app_redirects[key_name] = redirect_template data = { 'webui_dns': elb.get_all_load_balancers( load_balancer_names=['{}-web-ui'.format(env_name)])[0].dns_name } for key_name, redirect_template in app_redirects.iteritems(): k = get_or_new_key(b, key_name) redirect = unicode(redirect_template(data)) print ' Redirect {} to {}'.format(key_name, redirect) k.set_redirect(redirect) print '=====> Deployed to {}!'.format(b.get_website_endpoint()) if domain is not None: # TODO: support redirection from www.<domain> # b_www = 'www.{}'.format(bucket_name) ec2 = boto.connect_ec2() region_name = first([ z.region.name for z in ec2.get_all_zones() if z.name == config['availability_zone'] ]) s3_website_region = s3_website_regions[region_name] route53 = boto.connect_route53() zone_name = "{}.".format(get_tld("http://{}".format(domain))) zone = route53.get_zone(zone_name) if zone is None: raise Exception("Cannot find zone {}".format(zone_name)) full_domain = "{}.".format(domain) a_record = zone.get_a(full_domain) if not a_record: print '-----> Creating ALIAS for {} to S3'.format(full_domain) changes = ResourceRecordSets(route53, zone.id) change_a = changes.add_change('CREATE', full_domain, 'A') change_a.set_alias(alias_hosted_zone_id=s3_website_region[1], alias_dns_name=s3_website_region[0]) #change_cname = records.add_change('CREATE', 'www.' + full_domain, 'CNAME') #change_cname.add_value(b_www.get_website_endpoint()) changes.commit() else: print '-----> ALIAS for {} to S3 already exists'.format( full_domain) print ' {}'.format(a_record) if a_record.alias_dns_name != s3_website_region[0]: print ' WARNING: Alias DNS name is {}, but should be {}'.format( a_record.alias_dns_name, s3_website_region[0]) if a_record.alias_hosted_zone_id != s3_website_region[1]: print ' WARNING: Alias hosted zone ID is {}, but should be {}'.format( a_record.alias_hosted_zone_id, s3_website_region[1]) if a_record.name != full_domain: print ' WARNING: Domain is {}, but should be {}'.format( a_record.name, full_domain) if a_record.type != 'A': print ' WARNING: Record type is {}, but should be {}'.format( a_record.type, 'A') print '=====> DONE!'
def test_bucket_with_dash(): conn = boto.connect_s3('the_key', 'the_secret') conn.get_bucket.when.called_with( 'mybucket-test').should.throw(S3ResponseError)
def connect_method(self, *args, **kwargs): return boto.connect_s3(*args, **kwargs)
start_time = time.time() assert opt.mysql_query_file, 'Input query file (-q,--mysql_query_file) is not set.' assert os.path.isfile( opt.mysql_query_file ), 'Query file "%s"\ndoes not exists.' % opt.mysql_query_file q_file = os.path.splitext(os.path.basename(opt.mysql_query_file)) assert opt.red_to_table, 'Target Redshift table ' assert opt.s3_bucket_name, 'Target S3 bucket name (-b,--s3_bucket_name) is not set.' assert RepresentsInt(opt.mysql_lame_duck ), '[-l] --mysql_lame_duck is not of type "integer".' if not opt.s3_key_name: opt.s3_key_name = q_file[0] conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) try: bucket = conn.get_bucket(opt.s3_bucket_name) print('Uploading results of "%s" to existing bucket "%s"' % (''.join(q_file), opt.s3_bucket_name)) except S3ResponseError as err: if str(err).strip().endswith('404 Not Found'): print('Creating new bucket "%s" in location "%s"' % (opt.s3_bucket_name, opt.s3_location)) try: conn.create_bucket(opt.s3_bucket_name, location=opt.s3_location) print( 'Uploading results of "%s" to new bucket "%s" in region "%s"'
def get_s3_connection(self): if not self.s3: self.s3 = boto.connect_s3(self.aws_access_key_id, self.aws_secret_access_key) return self.s3
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import boto import sys import threading import multiprocessing import Queue c = boto.connect_s3() def uploadThread(mpUpload, fileQueue): while True: try: ix, fileName = fileQueue.get(False) with open(fileName, 'rb') as fp: mpUpload.upload_part_from_file(fp, ix) print 'finished upload of %s' % fileName except Queue.Empty: return if __name__ == "__main__": if len(sys.argv) < 4:
def test_missing_key(): conn = boto.connect_s3('the_key', 'the_secret') bucket = conn.create_bucket("foobar") bucket.get_key("the-key").should.equal(None)