Esempio n. 1
0
def main():
    new_hash = hashlib.md5(str(time.time())).hexdigest()[:8]
    parser = argparse.ArgumentParser()
    parser.add_argument("action",
                        choices=ALLOWED_ACTIONS,
                        action="store",
                        help="Action to take against the stack(s)")
    parser.add_argument("-l",
                        "--location",
                        nargs='*',
                        action="store",
                        dest="locations",
                        help="""If building, provide the
                        IP Address(es) from which ssh is allowed.\n
                        Example: './go.py build -l xx.xx.xx.xx yy.yy.yy.yy""",
                        type=ip_address_type,
                        default=["0.0.0.0"])
    parser.add_argument('--region',
                        action="store",
                        dest="region",
                        default=DEFAULT_REGION)
    parser.add_argument('--hash',
                        action="store",
                        dest="hash_id",
                        help="""Define the hash to use for multiple
                        deployments.  If left blank, the hash will be
                        generated.""",
                        default=new_hash)
    parser.add_argument('--full',
                        action='store_true',
                        help="Always build all components. (VPC, RDS, etc.)")
    args = parser.parse_args()
    full = args.full
    connections = dict()
    connections['cfn'] = cfn_connect(args.region)
    if args.action == "info":
        info(connections)
        sys.exit(0)
    connections['codedeploy'] = codedeploy_connect(args.region)
    connections['ec2'] = ec2_connect(args.region)
    connections['iam'] = iam_connect(args.region)
    connections['main_s3'] = s3_connect(MAIN_S3_BUCKET_REGION)
    connections['s3'] = s3_connect(args.region)
    if args.action == "test":
        #  Test pieces here
        sys.exit(0)
    if args.action == "build":
        if not args.locations:
            print "Please provide at least one IP Address."
            parser.print_help()
            sys.exit(1)
        build(connections, args.region, args.locations, args.hash_id, full)
    elif args.action == "destroy":
        destroy(connections, args.region)
 def __init__(self, input_queue, work_folder, bucket, prefix, region,
              aws_cred):
     self.input_queue_name = input_queue
     self.work_folder = work_folder
     self.data_folder = os.path.join(work_folder, 'data')
     self.bucket_name = bucket
     self.prefix = prefix
     self.region = region
     self.aws_cred = aws_cred
     self.analysis_bucket_name = "jonasfj-telemetry-analysis"
     if self.prefix != '' and not self.prefix.endswith('/'):
         self.prefix += '/'
     # Clear the work folder
     shutil.rmtree(self.work_folder, ignore_errors=True)
     self.s3 = s3_connect(self.region, **self.aws_cred)
     self.bucket = self.s3.get_bucket(self.bucket_name, validate=False)
     self.analysis_bucket = self.s3.get_bucket(self.analysis_bucket_name,
                                               validate=False)
     mkdirp(self.data_folder)
     self.cache_folder = os.path.join(self.work_folder, "cache")
     mkdirp(self.cache_folder)
     self.files_missing_path = os.path.join(self.work_folder,
                                            'FILES_MISSING')
     self.files_processed_path = os.path.join(self.work_folder,
                                              'FILES_PROCESSED')
     self.get_file('FILES_PROCESSED', self.files_processed_path)
     self.get_file('FILES_MISSING', self.files_missing_path)
Esempio n. 3
0
def prepare_bootstrap(manifest, build_server):
    if manifest.volume['backing'] == 's3':
        credentials = {
            'access-key':
            build_server.build_settings['ec2-credentials']['access-key'],
            'secret-key':
            build_server.build_settings['ec2-credentials']['secret-key']
        }
        from boto.s3 import connect_to_region as s3_connect
        s3_connection = s3_connect(
            manifest.image['region'],
            aws_access_key_id=credentials['access-key'],
            aws_secret_access_key=credentials['secret-key'])
        log.debug('Creating S3 bucket')
        bucket = s3_connection.create_bucket(manifest.image['bucket'],
                                             location=manifest.image['region'])
        try:
            yield
        finally:
            log.debug('Deleting S3 bucket')
            for item in bucket.list():
                bucket.delete_key(item.key)
            s3_connection.delete_bucket(manifest.image['bucket'])
    else:
        yield
Esempio n. 4
0
def main():
    if sys.version_info[:3] > (2, 7, 8):
        print "There is currently an SSL issue with Python 2.7.9 and newer."
        print "Please setup a virtualenv with Python 2.7.8 or less to proceed."
        sys.exit(1)
    new_hash = hashlib.md5(str(time.time())).hexdigest()[:8]
    parser = argparse.ArgumentParser()
    parser.add_argument("action", choices=ALLOWED_ACTIONS, action="store",
                        help="Action to take against the stack(s)")
    parser.add_argument("-l", "--location", nargs='*', action="store",
                        dest="locations", help="""If building, provide the
                        IP Address(es) from which ssh is allowed.\n
                        Example: './go.py build -l xx.xx.xx.xx yy.yy.yy.yy""",
                        type=ip_address_type, default=["0.0.0.0"])
    parser.add_argument('--region', action="store", dest="region",
                        default=DEFAULT_REGION)
    parser.add_argument('--hash', action="store", dest="hash_id",
                        help="""Define the hash to use for multiple
                        deployments.  If left blank, the hash will be
                        generated.""", default=new_hash)
    parser.add_argument('-u', '--user', action="store", dest="jenkins_user",
                        default=JENKINS_USER, help="Username for Jenkins")
    parser.add_argument('-e', '--email', action="store", dest="jenkins_email",
                        default=JENKINS_EMAIL, help="Email for Jenkins")
    parser.add_argument('-p', '--password', action="store_true",
                        dest="password_prompt",
                        help="Prompt for Jenkins Password")
    parser.add_argument('--full', action='store_true',
                        help="Always build all components. (VPC, RDS, etc.)")
    parser.add_argument('--warm', action='store_true',
                        help="Only build VPC, SG, and RDS")
    args = parser.parse_args()
    if args.password_prompt:
        print "WARNING: Password will be passed to CFN in plain text."
        args.jenkins_password = getpass.getpass()
    else:
        args.jenkins_password = JENKINS_PASSWORD
    connections = dict()
    connections['cfn'] = cfn_connect(args.region)
    if args.action == "info":
        info(connections)
        sys.exit(0)
    connections['codedeploy'] = codedeploy_connect(args.region)
    connections['ec2'] = ec2_connect(args.region)
    connections['iam'] = iam_connect(args.region)
    connections['s3'] = s3_connect(args.region)
    if args.action == "test":
        #  Test pieces here
        sys.exit(0)
    if args.action == "build":
        if not args.locations:
            print "Please provide at least one IP Address."
            parser.print_help()
            sys.exit(1)
        build(connections, args)
    elif args.action == "destroy":
        destroy(connections, args)
Esempio n. 5
0
def s3get(input_bucket, prefix, output_folder, decompress, compress, region,
          aws_cred, nb_workers = cpu_count() * 4):
    # Clear output folder if necessary
    shutil.rmtree(output_folder, ignore_errors = True)

    # Sanitize prefix, we always work on folders here
    if prefix != "" and not prefix.endswith('/'):
        prefix += '/'

    # Create queue of work to do
    queue = Queue()

    # Start workers
    downloaders = []
    for i in xrange(0, nb_workers):
        downloader = Downloader(queue, None, input_bucket, decompress, compress,
                                region, aws_cred)
        downloaders.append(downloader)
        downloader.start()

    s3 = s3_connect(region, **aws_cred)
    bucket = s3.get_bucket(input_bucket, validate = False)
    last_key = ''
    retries = 0
    while True:
        try:
            for k in bucket.list(prefix = prefix, marker = last_key):
                last_key = k.key
                source_prefix = k.key
                rel_prefix = source_prefix[len(prefix):]
                target_path = os.path.join(output_folder, *rel_prefix.split('/'))
                queue.put((source_prefix, target_path))
            break
        except:
            print >> sys.stderr, "List operation of %s at %s" % (prefix, last_key)
            print_exc(file = sys.stderr)
            retries += 1
            if retries >= NB_RETRIES:
                return False
            else:
                time.sleep(4 * ((retries - 1) ** 2))

    # Add end of queue marker for each worker
    for i in xrange(0, nb_workers):
        queue.put(None)

    # Join workers
    for downloader in downloaders:
        downloader.join()

    # If one of the worker failed, we've failed
    for downloader in downloaders:
        if downloader.exitcode != 0:
            return False

    return True
def collect_garbage(bucket, prefix, cache_folder, region, aws_cred,
                    nb_workers):
    # Sanitize prefix
    if prefix[-1] != '/':
        prefix += '/'

    # Connect to s3
    s3 = s3_connect(region, **aws_cred)
    s3_bucket = s3.get_bucket(bucket, validate=False)

    # Download versions.json if not in cache and load versions
    versions_json = os.path.join(cache_folder, 'versions.json')
    if not os.path.isfile(versions_json):
        versions = s3get_json(s3_bucket, prefix + 'versions.json', True, {})
        with open(versions_json, 'w') as f:
            json.dump(versions, f)
    else:
        with open(versions_json, 'r') as f:
            versions = json.load(f)

    print "### Collecting Garbage on S3"

    # List prefixes from bucket
    obsolete = []
    current = versions.values()
    for p in s3_bucket.list(prefix=prefix, delimiter='/'):
        if isinstance(p, Prefix):
            if p.name[len(prefix):-1] not in current:
                obsolete.append(p.name)

    # For each obsolute prefix
    for old in obsolete:
        # List objects and delete
        deleted = 0
        keys = []
        for k in s3_bucket.list(prefix=old):
            if len(keys) >= 1000:
                try:
                    s3_bucket.delete_keys(keys)
                    deleted += len(keys)
                except:
                    print >> sys.stderr, (
                        "Failed to delete %i objects from %s" %
                        (len(keys), old))
                keys = []
            keys.append(k)
        if len(keys) > 0:
            try:
                s3_bucket.delete_keys(keys)
                deleted += len(keys)
            except:
                print >> sys.stderr, ("Failed to delete %i objects from %s" %
                                      (len(keys), old))
        print " - Deleted %i objects from %s" % (deleted, old)
Esempio n. 7
0
def main():
    new_hash = hashlib.md5(str(time.time())).hexdigest()[:8]
    parser = argparse.ArgumentParser()
    parser.add_argument("action", choices=ALLOWED_ACTIONS, action="store",
                        help="Action to take against the stack(s)")
    parser.add_argument("-l", "--location", nargs='*', action="store",
                        dest="locations", help="""If building, provide the
                        IP Address(es) from which ssh is allowed.\n
                        Example: './go.py build -l xx.xx.xx.xx yy.yy.yy.yy""",
                        type=ip_address_type, default=["0.0.0.0"])
    parser.add_argument('--region', action="store", dest="region",
                        default=DEFAULT_REGION)
    parser.add_argument('--hash', action="store", dest="hash_id",
                        help="""Define the hash to use for multiple
                        deployments.  If left blank, the hash will be
                        generated.""", default=new_hash)
    parser.add_argument('--full', action='store_true',
                        help="Always build all components. (VPC, RDS, etc.)")
    args = parser.parse_args()
    full = args.full
    connections = dict()
    connections['cfn'] = cfn_connect(args.region)
    if args.action == "info":
        info(connections)
        sys.exit(0)
    connections['codedeploy'] = codedeploy_connect(args.region)
    connections['ec2'] = ec2_connect(args.region)
    connections['iam'] = iam_connect(args.region)
    connections['main_s3'] = s3_connect(MAIN_S3_BUCKET_REGION)
    connections['s3'] = s3_connect(args.region)
    if args.action == "test":
        #  Test pieces here
        sys.exit(0)
    if args.action == "build":
        if not args.locations:
            print "Please provide at least one IP Address."
            parser.print_help()
            sys.exit(1)
        build(connections, args.region, args.locations, args.hash_id, full)
    elif args.action == "destroy":
        destroy(connections, args.region)
def collect_garbage(bucket, prefix, cache_folder, region, aws_cred, nb_workers):
    # Sanitize prefix
    if prefix[-1] != '/':
        prefix += '/'

    # Connect to s3
    s3 = s3_connect(region, **aws_cred)
    s3_bucket = s3.get_bucket(bucket, validate = False)

    # Download versions.json if not in cache and load versions
    versions_json = os.path.join(cache_folder, 'versions.json')
    if not os.path.isfile(versions_json):
        versions = s3get_json(s3_bucket, prefix + 'versions.json', True, {})
        with open(versions_json, 'w') as f:
            json.dump(versions, f)
    else:
        with open(versions_json, 'r') as f:
            versions = json.load(f)

    print "### Collecting Garbage on S3"

    # List prefixes from bucket
    obsolete = []
    current = versions.values()
    for p in s3_bucket.list(prefix = prefix, delimiter = '/'):
        if isinstance(p, Prefix):
            if p.name[len(prefix):-1] not in current:
                obsolete.append(p.name)

    # For each obsolute prefix
    for old in obsolete:
        # List objects and delete
        deleted = 0
        keys = []
        for k in s3_bucket.list(prefix = old):
            if len(keys) >= 1000:
                try:
                    s3_bucket.delete_keys(keys)
                    deleted += len(keys)
                except:
                    print >> sys.stderr, ("Failed to delete %i objects from %s"
                                          % (len(keys), old))
                keys = []
            keys.append(k)
        if len(keys) > 0:
            try:
                s3_bucket.delete_keys(keys)
                deleted += len(keys)
            except:
                print >> sys.stderr, ("Failed to delete %i objects from %s"
                                      % (len(keys), old))
        print " - Deleted %i objects from %s" % (deleted, old)
Esempio n. 9
0
def prepare_bootstrap(manifest, build_server):
	if manifest.volume['backing'] == 's3':
		credentials = {'access-key': build_server.build_settings['ec2-credentials']['access-key'],
		               'secret-key': build_server.build_settings['ec2-credentials']['secret-key']}
		from boto.s3 import connect_to_region as s3_connect
		s3_connection = s3_connect(manifest.image['region'],
		                           aws_access_key_id=credentials['access-key'],
		                           aws_secret_access_key=credentials['secret-key'])
		log.debug('Creating S3 bucket')
		bucket = s3_connection.create_bucket(manifest.image['bucket'], location=manifest.image['region'])
		try:
			yield
		finally:
			log.debug('Deleting S3 bucket')
			for item in bucket.list():
				bucket.delete_key(item.key)
			s3_connection.delete_bucket(manifest.image['bucket'])
	else:
			yield
Esempio n. 10
0
    def run(self):
        if self.decompress:

            def read(path):
                return gzip.open(path, 'r')
        else:

            def read(path):
                return open(path, 'r')

        s3 = s3_connect(self.region, **self.aws_cred)
        bucket = s3.get_bucket(self.target_bucket, validate=False)
        while True:
            msg = self.queue.get()
            if msg == None:
                break
            source_file, target_prefix = msg
            retries = 0
            while retries < NB_RETRIES:
                try:
                    retries += 1
                    with read(source_file) as f:
                        data = f.read()
                    headers = {'Content-Type': 'application/json'}
                    if self.compress:
                        fobj = StringIO()
                        with gzip.GzipFile(mode='wb', fileobj=fobj) as zobj:
                            zobj.write(data)
                        data = fobj.getvalue()
                        fobj.close()
                        headers['Content-Encoding'] = 'gzip'
                    # Put to S3
                    k = Key(bucket)
                    k.key = target_prefix
                    k.set_contents_from_string(data, headers=headers)
                    break
                except:
                    print >> sys.stderr, "Failed to upload %s to %s" % msg
                    print_exc(file=sys.stderr)
                    time.sleep((retries - 1)**2)
            if retries >= NB_RETRIES:
                sys.exit(1)
        s3.close()
Esempio n. 11
0
 def run(self):
     if self.decompress:
         def read(path):
             return gzip.open(path, 'r')
     else:
         def read(path):
             return open(path, 'r')
     s3 = s3_connect(self.region, **self.aws_cred)
     bucket = s3.get_bucket(self.target_bucket, validate = False)
     while True:
         msg = self.queue.get()
         if msg == None:
             break
         source_file, target_prefix = msg
         retries = 0
         while retries < NB_RETRIES:
             try:
                 retries += 1
                 with read(source_file) as f:
                     data = f.read()
                 headers = {
                     'Content-Type':     'application/json'
                 }
                 if self.compress:
                     fobj = StringIO()
                     with gzip.GzipFile(mode = 'wb', fileobj = fobj) as zobj:
                         zobj.write(data)
                     data = fobj.getvalue()
                     fobj.close()
                     headers['Content-Encoding'] = 'gzip'
                 # Put to S3
                 k = Key(bucket)
                 k.key = target_prefix
                 k.set_contents_from_string(data, headers = headers)
                 break
             except:
                 print >> sys.stderr, "Failed to upload %s to %s" % msg
                 print_exc(file = sys.stderr)
                 time.sleep((retries - 1) ** 2)
         if retries >= NB_RETRIES:
             sys.exit(1)
     s3.close()
Esempio n. 12
0
def s3get(input_bucket, prefix, output_folder, decompress, compress, region,
          aws_cred, nb_workers = cpu_count() * 4):
    # Clear output folder if necessary
    shutil.rmtree(output_folder, ignore_errors = True)

    # Sanitize prefix, we always work on folders here
    if prefix != "" and not prefix.endswith('/'):
        prefix += '/'

    # Create queue of work to do
    queue = Queue()

    # Start workers
    downloaders = []
    for i in xrange(0, nb_workers):
        downloader = Downloader(queue, None, input_bucket, decompress, compress,
                                region, aws_cred)
        downloaders.append(downloader)
        downloader.start()

    s3 = s3_connect(region, **aws_cred)
    bucket = s3.get_bucket(input_bucket, validate = False)
    for k in bucket.list(prefix = prefix):
        source_prefix = k.key
        rel_prefix = source_prefix[len(prefix):]
        target_path = os.path.join(output_folder, *rel_prefix.split('/'))
        queue.put((source_prefix, target_path))

    # Add end of queue marker for each worker
    for i in xrange(0, nb_workers):
        queue.put(None)

    # Join workers
    for downloader in downloaders:
        downloader.join()

    # If one of the worker failed, we've failed
    for downloader in downloaders:
        if downloader.exitcode != 0:
            return False

    return True
Esempio n. 13
0
 def run(self):
     if self.compress:
         def write(path):
             return gzip.open(path, 'w')
     else:
         def write(path):
             return open(path, 'w')
     s3 = s3_connect(self.region, **self.aws_cred)
     bucket = s3.get_bucket(self.input_bucket, validate = False)
     while True:
         msg = self.queue.get()
         if msg == None:
             break
         source_prefix, target_path = msg
         retries = 0
         while retries < NB_RETRIES:
             try:
                 retries += 1
                 k = Key(bucket)
                 k.key = source_prefix
                 data = k.get_contents_as_string()
                 if self.decompress:
                     fobj = StringIO(data)
                     with gzip.GzipFile(mode = 'rb', fileobj = fobj) as zobj:
                         data = zobj.read()
                     fobj.close()
                 # Create target folder
                 mkdirp(os.path.dirname(target_path))
                 with write(target_path) as f:
                     f.write(data)
                 break
             except:
                 print >> sys.stderr, "Failed to download %s to %s" % msg
                 print_exc(file = sys.stderr)
                 time.sleep(4 * ((retries - 1) ** 2))
         if retries >= NB_RETRIES:
             sys.exit(1)
         if self.output_queue != None:
             self.output_queue.put(target_path)
     s3.close()
Esempio n. 14
0
 def __init__(self, input_queue, work_folder, bucket, prefix, region, aws_cred):
     self.input_queue_name       = input_queue
     self.work_folder            = work_folder
     self.data_folder            = os.path.join(work_folder, 'data')
     self.bucket_name            = bucket
     self.prefix                 = prefix
     self.region                 = region
     self.aws_cred               = aws_cred
     self.analysis_bucket_name   = "jonasfj-telemetry-analysis"
     if self.prefix != '' and not self.prefix.endswith('/'):
         self.prefix += '/'
     # Clear the work folder
     shutil.rmtree(self.work_folder, ignore_errors = True)
     self.s3 = s3_connect(self.region, **self.aws_cred)
     self.bucket = self.s3.get_bucket(self.bucket_name, validate = False)
     self.analysis_bucket = self.s3.get_bucket(self.analysis_bucket_name,
                                               validate = False)
     mkdirp(self.data_folder)
     self.cache_folder = os.path.join(self.work_folder, "cache")
     mkdirp(self.cache_folder)
     self.files_missing_path = os.path.join(self.work_folder, 'FILES_MISSING')
     self.files_processed_path = os.path.join(self.work_folder, 'FILES_PROCESSED')
     self.get_file('FILES_PROCESSED', self.files_processed_path)
     self.get_file('FILES_MISSING', self.files_missing_path)
def updateresults(input_folder, work_folder, bucket, prefix, cache_folder,
                  region, aws_cred, nb_workers):
    # Find input files
    input_files = []
    for path, folders, files in os.walk(input_folder):
        for f in files:
            # Get channel version
            cv = os.path.relpath(os.path.join(path, f), input_folder)
            input_files.append(cv)

    # Sanitize prefix
    if prefix[-1] != '/':
        prefix += '/'

    # Connect to s3
    s3 = s3_connect(region, **aws_cred)
    s3_bucket = s3.get_bucket(bucket, validate=False)

    # Download versions.json if not in cache and load versions
    versions_json = os.path.join(cache_folder, 'versions.json')
    if not os.path.isfile(versions_json):
        versions = s3get_json(s3_bucket, prefix + 'versions.json', True, {})
        with open(versions_json, 'w') as f:
            json.dump(versions, f)
    else:
        with open(versions_json, 'r') as f:
            versions = json.load(f)

    # Update results in bucket
    for channel_version in input_files:
        print "### Updating: " + channel_version

        # Download all files for channel_version to disk
        rmtree(work_folder, ignore_errors=True)
        data_folder = os.path.join(work_folder, channel_version)
        mkdirp(data_folder)
        snapshot = versions.get(channel_version, None)
        if snapshot:
            fetched = False
            while not fetched:
                fetched = s3get(bucket, prefix + snapshot, data_folder, True,
                                False, region, aws_cred)
                if not fetched:
                    print >> sys.stderr, "Failed to download %s" % snapshot
                    sleep(5 * 60)
            print " - downloaded " + snapshot

        # Create ChannelVersionManager
        channel, version = channel_version.split('/')
        manager = ChannelVersionManager(work_folder, channel, version, False,
                                        False, False)

        # Feed it with rows from input_file
        rows = 0
        with open(os.path.join(input_folder, channel_version), 'r') as f:
            for line in f:
                try:
                    filePath, blob = line.split('\t')
                    channel_, version_, measure, byDateType = filePath.split(
                        '/')
                    blob = json.loads(blob)
                    if channel_ != channel or version_ != version:
                        print >> sys.stderr, (
                            "Error: Found %s/%s within a %s file!" %
                            (channel_, version_, channel_version))
                        continue
                    manager.merge_in_blob(measure, byDateType, blob)
                    rows += 1
                except:
                    print >> sys.stderr, "Error while handling row:"
                    print_exc(file=sys.stderr)
        manager.flush()

        print " - merged rows %i" % rows

        # Upload updated files to S3
        date = datetime.utcnow().strftime("%Y%m%d%H%M%S")
        cv_prefix = "%s-%s-%s" % (date, version, channel)
        uploaded = False
        while not uploaded:
            uploaded = s3put(data_folder, bucket, prefix + cv_prefix, False,
                             True, region, aws_cred, nb_workers)
            if not uploaded:
                print >> sys.stderr, "Failed to upload '%s'" % cv_prefix
                sleep(5 * 60)

        print " - uploaded to " + cv_prefix

        # Store changes in versions
        versions[channel_version] = cv_prefix

    # Upload new versions.json and write to cache
    s3put_json(s3_bucket, prefix + 'versions.json', True, versions)
    with open(versions_json, 'w') as f:
        json.dump(versions, f)

    print "### New snapshot uploaded"

    try:
        # Garbage collect old channel/version folders on S3
        collect_garbage(bucket, prefix, cache_folder, region, aws_cred,
                        nb_workers)
    except:
        print >> sys.stderr, "Failed to collect garbage on S3"
Esempio n. 16
0
def updateresults(input_folder, work_folder, bucket, prefix, cache_folder,
                  region, aws_cred, nb_workers):
    # Find input files
    input_files = []
    for path, folders, files in os.walk(input_folder):
        for f in files:
            # Get channel version
            cv = os.path.relpath(os.path.join(path, f), input_folder)
            input_files.append(cv)

    # Sanitize prefix
    if prefix[-1] != '/':
        prefix += '/'

    # Connect to s3
    s3 = s3_connect(region, **aws_cred)
    s3_bucket = s3.get_bucket(bucket, validate = False)

    # Download versions.json if not in cache and load versions
    versions_json = os.path.join(cache_folder, 'versions.json')
    if not os.path.isfile(versions_json):
        versions = s3get_json(s3_bucket, prefix + 'versions.json', True, {})
        with open(versions_json, 'w') as f:
            json.dump(versions, f)
    else:
        with open(versions_json, 'r') as f:
            versions = json.load(f)

    # Update results in bucket
    for channel_version in input_files:
        print "### Updating: " + channel_version

        # Download all files for channel_version to disk
        rmtree(work_folder, ignore_errors = True)
        data_folder = os.path.join(work_folder, channel_version)
        mkdirp(data_folder)
        snapshot = versions.get(channel_version, None)
        if snapshot:
            fetched = False
            while not fetched:
                fetched = s3get(bucket, prefix + snapshot, data_folder, True,
                                False, region, aws_cred)
                if not fetched:
                    print >> sys.stderr, "Failed to download %s" % snapshot
                    sleep(5 * 60)
            print " - downloaded " + snapshot

        # Create ChannelVersionManager
        channel, version = channel_version.split('/')
        manager = ChannelVersionManager(work_folder, channel, version, False,
                  False, False)

        # Feed it with rows from input_file
        rows = 0
        with open(os.path.join(input_folder, channel_version), 'r') as f:
            for line in f:
                try:
                    filePath, blob = line.split('\t')
                    channel_, version_, measure, byDateType = filePath.split('/')
                    blob = json.loads(blob)
                    if channel_ != channel or version_ != version:
                        print >> sys.stderr, ("Error: Found %s/%s within a %s file!"
                                            % (channel_, version_, channel_version))
                        continue
                    manager.merge_in_blob(measure, byDateType, blob)
                    rows += 1
                except:
                    print >> sys.stderr, "Error while handling row:"
                    print_exc(file = sys.stderr)
        manager.flush()

        print " - merged rows %i" % rows

        # Upload updated files to S3
        date = datetime.utcnow().strftime("%Y%m%d%H%M%S")
        cv_prefix = "%s-%s-%s" % (date, version, channel)
        uploaded = False
        while not uploaded:
            uploaded = s3put(data_folder, bucket, prefix + cv_prefix, False,
                             True, region, aws_cred, nb_workers)
            if not uploaded:
                print >> sys.stderr, "Failed to upload '%s'" % cv_prefix
                sleep(5 * 60)

        print " - uploaded to " + cv_prefix

        # Store changes in versions
        versions[channel_version] = cv_prefix

    # Upload new versions.json and write to cache
    s3put_json(s3_bucket, prefix + 'versions.json', True, versions)
    with open(versions_json, 'w') as f:
            json.dump(versions, f)

    print "### New snapshot uploaded"

    try:
        # Garbage collect old channel/version folders on S3
        collect_garbage(bucket, prefix, cache_folder, region, aws_cred, nb_workers)
    except:
        print >> sys.stderr, "Failed to collect garbage on S3"
Esempio n. 17
0
from uuid import uuid4
from sqlalchemy import create_engine, MetaData
from sqlalchemy.sql import select, func
from subprocess import check_output, CalledProcessError
from tempfile import mkstemp
import crontab
import json

# Create flask app
app = Flask(__name__)
app.config.from_object('config')

# Connect to AWS
ec2 = ec2_connect(app.config['AWS_REGION'])
ses = ses_connect('us-east-1')  # only supported region!
s3 = s3_connect(app.config['AWS_REGION'])
bucket = s3.get_bucket(app.config['TEMPORARY_BUCKET'], validate=False)
code_bucket = s3.get_bucket(app.config['CODE_BUCKET'], validate=False)

# Create login manager
login_manager = LoginManager()
login_manager.anonymous_user = AnonymousUser

# Initialize browser id login
browser_id = BrowserID()

# Cron-related constants:
CRON_IDX_MIN = 0
CRON_IDX_HOUR = 1
CRON_IDX_DOM = 2
CRON_IDX_MON = 3
Esempio n. 18
0
from flask.ext.browserid import BrowserID
from user import User, AnonymousUser
from boto.ec2 import connect_to_region as ec2_connect
from boto.ses import connect_to_region as ses_connect
from boto.s3 import connect_to_region as s3_connect
from urlparse import urljoin
from uuid import uuid4

# Create flask app
app = Flask(__name__)
app.config.from_object("config")

# Connect to AWS
ec2 = ec2_connect(app.config["AWS_REGION"])
ses = ses_connect("us-east-1")  # only supported region!
s3 = s3_connect(app.config["AWS_REGION"])
bucket = s3.get_bucket(app.config["TEMPORARY_BUCKET"], validate=False)

# Create login manager
login_manager = LoginManager()
login_manager.anonymous_user = AnonymousUser

# Initialize browser id login
browser_id = BrowserID()


def abs_url_for(rule, **options):
    return urljoin(request.url_root, url_for(rule, **options))


@browser_id.user_loader
Esempio n. 19
0
from subprocess import check_output, CalledProcessError
from tempfile import mkstemp
import crontab
import json
import re
import os.path

# Create flask app
app = Flask(__name__)
app.config.from_object('config')

# Connect to AWS
emr  = emr_connect(app.config['AWS_REGION'])
ec2 = ec2_connect(app.config['AWS_REGION'])
ses = ses_connect(app.config['AWS_REGION'])
s3  = s3_connect(app.config['AWS_REGION'])
bucket = s3.get_bucket(app.config['TEMPORARY_BUCKET'], validate = False)
code_bucket = s3.get_bucket(app.config['CODE_BUCKET'], validate = False)

# Create login manager
login_manager = LoginManager()
login_manager.anonymous_user = AnonymousUser

# Initialize browser id login
browser_id = BrowserID()

# Cron-related constants:
CRON_IDX_MIN  = 0
CRON_IDX_HOUR = 1
CRON_IDX_DOM  = 2
CRON_IDX_MON  = 3
Esempio n. 20
0
 def setup_s3(self):
   self.s3 = s3_connect(self.s3_region)
   self.bucket = self.s3.get_bucket(self.s3_bucket)
Esempio n. 21
0
 def setup_s3(self):
     self.s3 = s3_connect(self.s3_region)
     self.bucket = self.s3.get_bucket(self.s3_bucket)