def main(): new_hash = hashlib.md5(str(time.time())).hexdigest()[:8] parser = argparse.ArgumentParser() parser.add_argument("action", choices=ALLOWED_ACTIONS, action="store", help="Action to take against the stack(s)") parser.add_argument("-l", "--location", nargs='*', action="store", dest="locations", help="""If building, provide the IP Address(es) from which ssh is allowed.\n Example: './go.py build -l xx.xx.xx.xx yy.yy.yy.yy""", type=ip_address_type, default=["0.0.0.0"]) parser.add_argument('--region', action="store", dest="region", default=DEFAULT_REGION) parser.add_argument('--hash', action="store", dest="hash_id", help="""Define the hash to use for multiple deployments. If left blank, the hash will be generated.""", default=new_hash) parser.add_argument('--full', action='store_true', help="Always build all components. (VPC, RDS, etc.)") args = parser.parse_args() full = args.full connections = dict() connections['cfn'] = cfn_connect(args.region) if args.action == "info": info(connections) sys.exit(0) connections['codedeploy'] = codedeploy_connect(args.region) connections['ec2'] = ec2_connect(args.region) connections['iam'] = iam_connect(args.region) connections['main_s3'] = s3_connect(MAIN_S3_BUCKET_REGION) connections['s3'] = s3_connect(args.region) if args.action == "test": # Test pieces here sys.exit(0) if args.action == "build": if not args.locations: print "Please provide at least one IP Address." parser.print_help() sys.exit(1) build(connections, args.region, args.locations, args.hash_id, full) elif args.action == "destroy": destroy(connections, args.region)
def __init__(self, input_queue, work_folder, bucket, prefix, region, aws_cred): self.input_queue_name = input_queue self.work_folder = work_folder self.data_folder = os.path.join(work_folder, 'data') self.bucket_name = bucket self.prefix = prefix self.region = region self.aws_cred = aws_cred self.analysis_bucket_name = "jonasfj-telemetry-analysis" if self.prefix != '' and not self.prefix.endswith('/'): self.prefix += '/' # Clear the work folder shutil.rmtree(self.work_folder, ignore_errors=True) self.s3 = s3_connect(self.region, **self.aws_cred) self.bucket = self.s3.get_bucket(self.bucket_name, validate=False) self.analysis_bucket = self.s3.get_bucket(self.analysis_bucket_name, validate=False) mkdirp(self.data_folder) self.cache_folder = os.path.join(self.work_folder, "cache") mkdirp(self.cache_folder) self.files_missing_path = os.path.join(self.work_folder, 'FILES_MISSING') self.files_processed_path = os.path.join(self.work_folder, 'FILES_PROCESSED') self.get_file('FILES_PROCESSED', self.files_processed_path) self.get_file('FILES_MISSING', self.files_missing_path)
def prepare_bootstrap(manifest, build_server): if manifest.volume['backing'] == 's3': credentials = { 'access-key': build_server.build_settings['ec2-credentials']['access-key'], 'secret-key': build_server.build_settings['ec2-credentials']['secret-key'] } from boto.s3 import connect_to_region as s3_connect s3_connection = s3_connect( manifest.image['region'], aws_access_key_id=credentials['access-key'], aws_secret_access_key=credentials['secret-key']) log.debug('Creating S3 bucket') bucket = s3_connection.create_bucket(manifest.image['bucket'], location=manifest.image['region']) try: yield finally: log.debug('Deleting S3 bucket') for item in bucket.list(): bucket.delete_key(item.key) s3_connection.delete_bucket(manifest.image['bucket']) else: yield
def main(): if sys.version_info[:3] > (2, 7, 8): print "There is currently an SSL issue with Python 2.7.9 and newer." print "Please setup a virtualenv with Python 2.7.8 or less to proceed." sys.exit(1) new_hash = hashlib.md5(str(time.time())).hexdigest()[:8] parser = argparse.ArgumentParser() parser.add_argument("action", choices=ALLOWED_ACTIONS, action="store", help="Action to take against the stack(s)") parser.add_argument("-l", "--location", nargs='*', action="store", dest="locations", help="""If building, provide the IP Address(es) from which ssh is allowed.\n Example: './go.py build -l xx.xx.xx.xx yy.yy.yy.yy""", type=ip_address_type, default=["0.0.0.0"]) parser.add_argument('--region', action="store", dest="region", default=DEFAULT_REGION) parser.add_argument('--hash', action="store", dest="hash_id", help="""Define the hash to use for multiple deployments. If left blank, the hash will be generated.""", default=new_hash) parser.add_argument('-u', '--user', action="store", dest="jenkins_user", default=JENKINS_USER, help="Username for Jenkins") parser.add_argument('-e', '--email', action="store", dest="jenkins_email", default=JENKINS_EMAIL, help="Email for Jenkins") parser.add_argument('-p', '--password', action="store_true", dest="password_prompt", help="Prompt for Jenkins Password") parser.add_argument('--full', action='store_true', help="Always build all components. (VPC, RDS, etc.)") parser.add_argument('--warm', action='store_true', help="Only build VPC, SG, and RDS") args = parser.parse_args() if args.password_prompt: print "WARNING: Password will be passed to CFN in plain text." args.jenkins_password = getpass.getpass() else: args.jenkins_password = JENKINS_PASSWORD connections = dict() connections['cfn'] = cfn_connect(args.region) if args.action == "info": info(connections) sys.exit(0) connections['codedeploy'] = codedeploy_connect(args.region) connections['ec2'] = ec2_connect(args.region) connections['iam'] = iam_connect(args.region) connections['s3'] = s3_connect(args.region) if args.action == "test": # Test pieces here sys.exit(0) if args.action == "build": if not args.locations: print "Please provide at least one IP Address." parser.print_help() sys.exit(1) build(connections, args) elif args.action == "destroy": destroy(connections, args)
def s3get(input_bucket, prefix, output_folder, decompress, compress, region, aws_cred, nb_workers = cpu_count() * 4): # Clear output folder if necessary shutil.rmtree(output_folder, ignore_errors = True) # Sanitize prefix, we always work on folders here if prefix != "" and not prefix.endswith('/'): prefix += '/' # Create queue of work to do queue = Queue() # Start workers downloaders = [] for i in xrange(0, nb_workers): downloader = Downloader(queue, None, input_bucket, decompress, compress, region, aws_cred) downloaders.append(downloader) downloader.start() s3 = s3_connect(region, **aws_cred) bucket = s3.get_bucket(input_bucket, validate = False) last_key = '' retries = 0 while True: try: for k in bucket.list(prefix = prefix, marker = last_key): last_key = k.key source_prefix = k.key rel_prefix = source_prefix[len(prefix):] target_path = os.path.join(output_folder, *rel_prefix.split('/')) queue.put((source_prefix, target_path)) break except: print >> sys.stderr, "List operation of %s at %s" % (prefix, last_key) print_exc(file = sys.stderr) retries += 1 if retries >= NB_RETRIES: return False else: time.sleep(4 * ((retries - 1) ** 2)) # Add end of queue marker for each worker for i in xrange(0, nb_workers): queue.put(None) # Join workers for downloader in downloaders: downloader.join() # If one of the worker failed, we've failed for downloader in downloaders: if downloader.exitcode != 0: return False return True
def collect_garbage(bucket, prefix, cache_folder, region, aws_cred, nb_workers): # Sanitize prefix if prefix[-1] != '/': prefix += '/' # Connect to s3 s3 = s3_connect(region, **aws_cred) s3_bucket = s3.get_bucket(bucket, validate=False) # Download versions.json if not in cache and load versions versions_json = os.path.join(cache_folder, 'versions.json') if not os.path.isfile(versions_json): versions = s3get_json(s3_bucket, prefix + 'versions.json', True, {}) with open(versions_json, 'w') as f: json.dump(versions, f) else: with open(versions_json, 'r') as f: versions = json.load(f) print "### Collecting Garbage on S3" # List prefixes from bucket obsolete = [] current = versions.values() for p in s3_bucket.list(prefix=prefix, delimiter='/'): if isinstance(p, Prefix): if p.name[len(prefix):-1] not in current: obsolete.append(p.name) # For each obsolute prefix for old in obsolete: # List objects and delete deleted = 0 keys = [] for k in s3_bucket.list(prefix=old): if len(keys) >= 1000: try: s3_bucket.delete_keys(keys) deleted += len(keys) except: print >> sys.stderr, ( "Failed to delete %i objects from %s" % (len(keys), old)) keys = [] keys.append(k) if len(keys) > 0: try: s3_bucket.delete_keys(keys) deleted += len(keys) except: print >> sys.stderr, ("Failed to delete %i objects from %s" % (len(keys), old)) print " - Deleted %i objects from %s" % (deleted, old)
def collect_garbage(bucket, prefix, cache_folder, region, aws_cred, nb_workers): # Sanitize prefix if prefix[-1] != '/': prefix += '/' # Connect to s3 s3 = s3_connect(region, **aws_cred) s3_bucket = s3.get_bucket(bucket, validate = False) # Download versions.json if not in cache and load versions versions_json = os.path.join(cache_folder, 'versions.json') if not os.path.isfile(versions_json): versions = s3get_json(s3_bucket, prefix + 'versions.json', True, {}) with open(versions_json, 'w') as f: json.dump(versions, f) else: with open(versions_json, 'r') as f: versions = json.load(f) print "### Collecting Garbage on S3" # List prefixes from bucket obsolete = [] current = versions.values() for p in s3_bucket.list(prefix = prefix, delimiter = '/'): if isinstance(p, Prefix): if p.name[len(prefix):-1] not in current: obsolete.append(p.name) # For each obsolute prefix for old in obsolete: # List objects and delete deleted = 0 keys = [] for k in s3_bucket.list(prefix = old): if len(keys) >= 1000: try: s3_bucket.delete_keys(keys) deleted += len(keys) except: print >> sys.stderr, ("Failed to delete %i objects from %s" % (len(keys), old)) keys = [] keys.append(k) if len(keys) > 0: try: s3_bucket.delete_keys(keys) deleted += len(keys) except: print >> sys.stderr, ("Failed to delete %i objects from %s" % (len(keys), old)) print " - Deleted %i objects from %s" % (deleted, old)
def prepare_bootstrap(manifest, build_server): if manifest.volume['backing'] == 's3': credentials = {'access-key': build_server.build_settings['ec2-credentials']['access-key'], 'secret-key': build_server.build_settings['ec2-credentials']['secret-key']} from boto.s3 import connect_to_region as s3_connect s3_connection = s3_connect(manifest.image['region'], aws_access_key_id=credentials['access-key'], aws_secret_access_key=credentials['secret-key']) log.debug('Creating S3 bucket') bucket = s3_connection.create_bucket(manifest.image['bucket'], location=manifest.image['region']) try: yield finally: log.debug('Deleting S3 bucket') for item in bucket.list(): bucket.delete_key(item.key) s3_connection.delete_bucket(manifest.image['bucket']) else: yield
def run(self): if self.decompress: def read(path): return gzip.open(path, 'r') else: def read(path): return open(path, 'r') s3 = s3_connect(self.region, **self.aws_cred) bucket = s3.get_bucket(self.target_bucket, validate=False) while True: msg = self.queue.get() if msg == None: break source_file, target_prefix = msg retries = 0 while retries < NB_RETRIES: try: retries += 1 with read(source_file) as f: data = f.read() headers = {'Content-Type': 'application/json'} if self.compress: fobj = StringIO() with gzip.GzipFile(mode='wb', fileobj=fobj) as zobj: zobj.write(data) data = fobj.getvalue() fobj.close() headers['Content-Encoding'] = 'gzip' # Put to S3 k = Key(bucket) k.key = target_prefix k.set_contents_from_string(data, headers=headers) break except: print >> sys.stderr, "Failed to upload %s to %s" % msg print_exc(file=sys.stderr) time.sleep((retries - 1)**2) if retries >= NB_RETRIES: sys.exit(1) s3.close()
def run(self): if self.decompress: def read(path): return gzip.open(path, 'r') else: def read(path): return open(path, 'r') s3 = s3_connect(self.region, **self.aws_cred) bucket = s3.get_bucket(self.target_bucket, validate = False) while True: msg = self.queue.get() if msg == None: break source_file, target_prefix = msg retries = 0 while retries < NB_RETRIES: try: retries += 1 with read(source_file) as f: data = f.read() headers = { 'Content-Type': 'application/json' } if self.compress: fobj = StringIO() with gzip.GzipFile(mode = 'wb', fileobj = fobj) as zobj: zobj.write(data) data = fobj.getvalue() fobj.close() headers['Content-Encoding'] = 'gzip' # Put to S3 k = Key(bucket) k.key = target_prefix k.set_contents_from_string(data, headers = headers) break except: print >> sys.stderr, "Failed to upload %s to %s" % msg print_exc(file = sys.stderr) time.sleep((retries - 1) ** 2) if retries >= NB_RETRIES: sys.exit(1) s3.close()
def s3get(input_bucket, prefix, output_folder, decompress, compress, region, aws_cred, nb_workers = cpu_count() * 4): # Clear output folder if necessary shutil.rmtree(output_folder, ignore_errors = True) # Sanitize prefix, we always work on folders here if prefix != "" and not prefix.endswith('/'): prefix += '/' # Create queue of work to do queue = Queue() # Start workers downloaders = [] for i in xrange(0, nb_workers): downloader = Downloader(queue, None, input_bucket, decompress, compress, region, aws_cred) downloaders.append(downloader) downloader.start() s3 = s3_connect(region, **aws_cred) bucket = s3.get_bucket(input_bucket, validate = False) for k in bucket.list(prefix = prefix): source_prefix = k.key rel_prefix = source_prefix[len(prefix):] target_path = os.path.join(output_folder, *rel_prefix.split('/')) queue.put((source_prefix, target_path)) # Add end of queue marker for each worker for i in xrange(0, nb_workers): queue.put(None) # Join workers for downloader in downloaders: downloader.join() # If one of the worker failed, we've failed for downloader in downloaders: if downloader.exitcode != 0: return False return True
def run(self): if self.compress: def write(path): return gzip.open(path, 'w') else: def write(path): return open(path, 'w') s3 = s3_connect(self.region, **self.aws_cred) bucket = s3.get_bucket(self.input_bucket, validate = False) while True: msg = self.queue.get() if msg == None: break source_prefix, target_path = msg retries = 0 while retries < NB_RETRIES: try: retries += 1 k = Key(bucket) k.key = source_prefix data = k.get_contents_as_string() if self.decompress: fobj = StringIO(data) with gzip.GzipFile(mode = 'rb', fileobj = fobj) as zobj: data = zobj.read() fobj.close() # Create target folder mkdirp(os.path.dirname(target_path)) with write(target_path) as f: f.write(data) break except: print >> sys.stderr, "Failed to download %s to %s" % msg print_exc(file = sys.stderr) time.sleep(4 * ((retries - 1) ** 2)) if retries >= NB_RETRIES: sys.exit(1) if self.output_queue != None: self.output_queue.put(target_path) s3.close()
def __init__(self, input_queue, work_folder, bucket, prefix, region, aws_cred): self.input_queue_name = input_queue self.work_folder = work_folder self.data_folder = os.path.join(work_folder, 'data') self.bucket_name = bucket self.prefix = prefix self.region = region self.aws_cred = aws_cred self.analysis_bucket_name = "jonasfj-telemetry-analysis" if self.prefix != '' and not self.prefix.endswith('/'): self.prefix += '/' # Clear the work folder shutil.rmtree(self.work_folder, ignore_errors = True) self.s3 = s3_connect(self.region, **self.aws_cred) self.bucket = self.s3.get_bucket(self.bucket_name, validate = False) self.analysis_bucket = self.s3.get_bucket(self.analysis_bucket_name, validate = False) mkdirp(self.data_folder) self.cache_folder = os.path.join(self.work_folder, "cache") mkdirp(self.cache_folder) self.files_missing_path = os.path.join(self.work_folder, 'FILES_MISSING') self.files_processed_path = os.path.join(self.work_folder, 'FILES_PROCESSED') self.get_file('FILES_PROCESSED', self.files_processed_path) self.get_file('FILES_MISSING', self.files_missing_path)
def updateresults(input_folder, work_folder, bucket, prefix, cache_folder, region, aws_cred, nb_workers): # Find input files input_files = [] for path, folders, files in os.walk(input_folder): for f in files: # Get channel version cv = os.path.relpath(os.path.join(path, f), input_folder) input_files.append(cv) # Sanitize prefix if prefix[-1] != '/': prefix += '/' # Connect to s3 s3 = s3_connect(region, **aws_cred) s3_bucket = s3.get_bucket(bucket, validate=False) # Download versions.json if not in cache and load versions versions_json = os.path.join(cache_folder, 'versions.json') if not os.path.isfile(versions_json): versions = s3get_json(s3_bucket, prefix + 'versions.json', True, {}) with open(versions_json, 'w') as f: json.dump(versions, f) else: with open(versions_json, 'r') as f: versions = json.load(f) # Update results in bucket for channel_version in input_files: print "### Updating: " + channel_version # Download all files for channel_version to disk rmtree(work_folder, ignore_errors=True) data_folder = os.path.join(work_folder, channel_version) mkdirp(data_folder) snapshot = versions.get(channel_version, None) if snapshot: fetched = False while not fetched: fetched = s3get(bucket, prefix + snapshot, data_folder, True, False, region, aws_cred) if not fetched: print >> sys.stderr, "Failed to download %s" % snapshot sleep(5 * 60) print " - downloaded " + snapshot # Create ChannelVersionManager channel, version = channel_version.split('/') manager = ChannelVersionManager(work_folder, channel, version, False, False, False) # Feed it with rows from input_file rows = 0 with open(os.path.join(input_folder, channel_version), 'r') as f: for line in f: try: filePath, blob = line.split('\t') channel_, version_, measure, byDateType = filePath.split( '/') blob = json.loads(blob) if channel_ != channel or version_ != version: print >> sys.stderr, ( "Error: Found %s/%s within a %s file!" % (channel_, version_, channel_version)) continue manager.merge_in_blob(measure, byDateType, blob) rows += 1 except: print >> sys.stderr, "Error while handling row:" print_exc(file=sys.stderr) manager.flush() print " - merged rows %i" % rows # Upload updated files to S3 date = datetime.utcnow().strftime("%Y%m%d%H%M%S") cv_prefix = "%s-%s-%s" % (date, version, channel) uploaded = False while not uploaded: uploaded = s3put(data_folder, bucket, prefix + cv_prefix, False, True, region, aws_cred, nb_workers) if not uploaded: print >> sys.stderr, "Failed to upload '%s'" % cv_prefix sleep(5 * 60) print " - uploaded to " + cv_prefix # Store changes in versions versions[channel_version] = cv_prefix # Upload new versions.json and write to cache s3put_json(s3_bucket, prefix + 'versions.json', True, versions) with open(versions_json, 'w') as f: json.dump(versions, f) print "### New snapshot uploaded" try: # Garbage collect old channel/version folders on S3 collect_garbage(bucket, prefix, cache_folder, region, aws_cred, nb_workers) except: print >> sys.stderr, "Failed to collect garbage on S3"
def updateresults(input_folder, work_folder, bucket, prefix, cache_folder, region, aws_cred, nb_workers): # Find input files input_files = [] for path, folders, files in os.walk(input_folder): for f in files: # Get channel version cv = os.path.relpath(os.path.join(path, f), input_folder) input_files.append(cv) # Sanitize prefix if prefix[-1] != '/': prefix += '/' # Connect to s3 s3 = s3_connect(region, **aws_cred) s3_bucket = s3.get_bucket(bucket, validate = False) # Download versions.json if not in cache and load versions versions_json = os.path.join(cache_folder, 'versions.json') if not os.path.isfile(versions_json): versions = s3get_json(s3_bucket, prefix + 'versions.json', True, {}) with open(versions_json, 'w') as f: json.dump(versions, f) else: with open(versions_json, 'r') as f: versions = json.load(f) # Update results in bucket for channel_version in input_files: print "### Updating: " + channel_version # Download all files for channel_version to disk rmtree(work_folder, ignore_errors = True) data_folder = os.path.join(work_folder, channel_version) mkdirp(data_folder) snapshot = versions.get(channel_version, None) if snapshot: fetched = False while not fetched: fetched = s3get(bucket, prefix + snapshot, data_folder, True, False, region, aws_cred) if not fetched: print >> sys.stderr, "Failed to download %s" % snapshot sleep(5 * 60) print " - downloaded " + snapshot # Create ChannelVersionManager channel, version = channel_version.split('/') manager = ChannelVersionManager(work_folder, channel, version, False, False, False) # Feed it with rows from input_file rows = 0 with open(os.path.join(input_folder, channel_version), 'r') as f: for line in f: try: filePath, blob = line.split('\t') channel_, version_, measure, byDateType = filePath.split('/') blob = json.loads(blob) if channel_ != channel or version_ != version: print >> sys.stderr, ("Error: Found %s/%s within a %s file!" % (channel_, version_, channel_version)) continue manager.merge_in_blob(measure, byDateType, blob) rows += 1 except: print >> sys.stderr, "Error while handling row:" print_exc(file = sys.stderr) manager.flush() print " - merged rows %i" % rows # Upload updated files to S3 date = datetime.utcnow().strftime("%Y%m%d%H%M%S") cv_prefix = "%s-%s-%s" % (date, version, channel) uploaded = False while not uploaded: uploaded = s3put(data_folder, bucket, prefix + cv_prefix, False, True, region, aws_cred, nb_workers) if not uploaded: print >> sys.stderr, "Failed to upload '%s'" % cv_prefix sleep(5 * 60) print " - uploaded to " + cv_prefix # Store changes in versions versions[channel_version] = cv_prefix # Upload new versions.json and write to cache s3put_json(s3_bucket, prefix + 'versions.json', True, versions) with open(versions_json, 'w') as f: json.dump(versions, f) print "### New snapshot uploaded" try: # Garbage collect old channel/version folders on S3 collect_garbage(bucket, prefix, cache_folder, region, aws_cred, nb_workers) except: print >> sys.stderr, "Failed to collect garbage on S3"
from uuid import uuid4 from sqlalchemy import create_engine, MetaData from sqlalchemy.sql import select, func from subprocess import check_output, CalledProcessError from tempfile import mkstemp import crontab import json # Create flask app app = Flask(__name__) app.config.from_object('config') # Connect to AWS ec2 = ec2_connect(app.config['AWS_REGION']) ses = ses_connect('us-east-1') # only supported region! s3 = s3_connect(app.config['AWS_REGION']) bucket = s3.get_bucket(app.config['TEMPORARY_BUCKET'], validate=False) code_bucket = s3.get_bucket(app.config['CODE_BUCKET'], validate=False) # Create login manager login_manager = LoginManager() login_manager.anonymous_user = AnonymousUser # Initialize browser id login browser_id = BrowserID() # Cron-related constants: CRON_IDX_MIN = 0 CRON_IDX_HOUR = 1 CRON_IDX_DOM = 2 CRON_IDX_MON = 3
from flask.ext.browserid import BrowserID from user import User, AnonymousUser from boto.ec2 import connect_to_region as ec2_connect from boto.ses import connect_to_region as ses_connect from boto.s3 import connect_to_region as s3_connect from urlparse import urljoin from uuid import uuid4 # Create flask app app = Flask(__name__) app.config.from_object("config") # Connect to AWS ec2 = ec2_connect(app.config["AWS_REGION"]) ses = ses_connect("us-east-1") # only supported region! s3 = s3_connect(app.config["AWS_REGION"]) bucket = s3.get_bucket(app.config["TEMPORARY_BUCKET"], validate=False) # Create login manager login_manager = LoginManager() login_manager.anonymous_user = AnonymousUser # Initialize browser id login browser_id = BrowserID() def abs_url_for(rule, **options): return urljoin(request.url_root, url_for(rule, **options)) @browser_id.user_loader
from subprocess import check_output, CalledProcessError from tempfile import mkstemp import crontab import json import re import os.path # Create flask app app = Flask(__name__) app.config.from_object('config') # Connect to AWS emr = emr_connect(app.config['AWS_REGION']) ec2 = ec2_connect(app.config['AWS_REGION']) ses = ses_connect(app.config['AWS_REGION']) s3 = s3_connect(app.config['AWS_REGION']) bucket = s3.get_bucket(app.config['TEMPORARY_BUCKET'], validate = False) code_bucket = s3.get_bucket(app.config['CODE_BUCKET'], validate = False) # Create login manager login_manager = LoginManager() login_manager.anonymous_user = AnonymousUser # Initialize browser id login browser_id = BrowserID() # Cron-related constants: CRON_IDX_MIN = 0 CRON_IDX_HOUR = 1 CRON_IDX_DOM = 2 CRON_IDX_MON = 3
def setup_s3(self): self.s3 = s3_connect(self.s3_region) self.bucket = self.s3.get_bucket(self.s3_bucket)