Beispiel #1
0
 def __init__(self, ctx):
     super(S3Output, self).__init__(ctx)
     self.date_path = datetime.datetime.now().strftime('%Y/%m/%d/%H')
     self.s3_path, self.bucket, self.key_prefix = parse_s3(
         self.ctx.output_path)
     self.root_dir = tempfile.mkdtemp()
     self.transfer = None
Beispiel #2
0
 def __init__(self, ctx):
     super(S3Output, self).__init__(ctx)
     self.date_path = datetime.datetime.now().strftime('%Y/%m/%d/%H')
     self.s3_path, self.bucket, self.key_prefix = parse_s3(
         self.ctx.output_path)
     self.root_dir = tempfile.mkdtemp()
     self.transfer = None
Beispiel #3
0
 def __init__(self, ctx, config):
     self.ctx = ctx
     self.config = config
     self.output_path = self.get_output_path(self.config['url'])
     self.s3_path, self.bucket, self.key_prefix = utils.parse_s3(
         self.output_path)
     self.root_dir = tempfile.mkdtemp()
     self.transfer = None
Beispiel #4
0
 def __init__(self, ctx, config):
     self.ctx = ctx
     self.config = config
     self.output_path = self.get_output_path(self.config['url'])
     self.s3_path, self.bucket, self.key_prefix = utils.parse_s3(
         self.output_path)
     self.root_dir = tempfile.mkdtemp()
     self.transfer = None
Beispiel #5
0
 def _upload_func(self, s3_uri, func, archive):
     _, bucket, key_prefix = parse_s3(s3_uri)
     key = "%s/%s" % (key_prefix, func.name)
     transfer = S3Transfer(self.session_factory().client('s3'),
                           config=TransferConfig(multipart_threshold=1024 *
                                                 1024 * 4))
     transfer.upload_file(archive.path,
                          bucket=bucket,
                          key=key,
                          extra_args={'ServerSideEncryption': 'AES256'})
     return bucket, key
Beispiel #6
0
 def _upload_func(self, s3_uri, func, archive):
     _, bucket, key_prefix = parse_s3(s3_uri)
     key = "%s/%s" % (key_prefix, func.name)
     transfer = S3Transfer(
         self.session_factory().client('s3'),
         config=TransferConfig(
             multipart_threshold=1024 * 1024 * 4))
     transfer.upload_file(
         archive.path,
         bucket=bucket,
         key=key,
         extra_args={
             'ServerSideEncryption': 'AES256'})
     return bucket, key
Beispiel #7
0
 def test_parse_s3(self):
     self.assertRaises(ValueError, utils.parse_s3, "bogus")
     self.assertEqual(utils.parse_s3("s3://things"), ("s3://things", "things", ""))
Beispiel #8
0
 def test_parse_s3(self):
     self.assertRaises(ValueError, utils.parse_s3, 'bogus')
     self.assertEqual(
         utils.parse_s3('s3://things'),
         ('s3://things', 'things', ''),
     )
def s3_rename(output_dir, old, new, sse_kms_key_id):
    # move the old data into the new area
    session = Session()
    client = session.client('s3')
    s3 = session.resource('s3')
    s3_path, bucket, key_prefix = parse_s3(output_dir)

    # Ensure bucket exists
    try:
        client.head_bucket(Bucket=bucket)
    except ClientError:
        raise ArgumentError('S3 bucket {} does not exist.'.format(bucket))

    log.info(
        'Retrieving list of S3 objects to rename in bucket "{}"'.format(
            bucket
        )
    )
    paginator = client.get_paginator('list_objects_v2')
    rename_iterator = paginator.paginate(Bucket=bucket, Prefix=old + '/')
    obj_count = 0

    for page in rename_iterator:
        # loop through the pages of results renaming

        if page.get('Contents') is None:
            raise ArgumentError('Key {} does not exist in bucket {}'.format(
                    old, bucket))

        # Loop through the old objects copying and deleting
        for obj in page.get('Contents'):
            old_key = obj.get('Key')
            old_meta = client.head_object(Bucket=bucket, Key=old_key)
            old_sse_type = old_meta.get('ServerSideEncryption')
            old_sse_key = old_meta.get('SSEKMSKeyId')
            new_key = new + old_key[len(old):]

            # check that we haven't already run and have existing data
            # in the new key
            new_obj = s3.Object(bucket, new_key)
            if new_key == old_key:
                log.debug(('Old and new keys match and new SSEKMSKeyId '
                         'Specified, re-encrypting {}').format(new_obj.key))
            else:
                try:
                    new_obj.load()
                    if new_key != old_key:
                        log.info('Skipping existing output in new '
                                 'location: {}'.format(new_obj.key))
                        continue
                except ClientError as e:
                    response_code = e.response.get('Error').get('Code')
                    if response_code == '404':
                        # the obj doesn't exist so we will copy
                        # the existing obj to the new spot
                        pass
                    else:
                        raise

            copy_from_args = dict(
                CopySource={
                    'Bucket': bucket,
                    'Key': old_key,
                    'MetadataDirective': 'COPY'
                })

            if sse_kms_key_id:
                # Re-encrypt with a new key
                copy_from_args['ServerSideEncryption'] = 'aws:kms'
                copy_from_args['SSEKMSKeyId'] = sse_kms_key_id
            if not sse_kms_key_id and old_sse_type == 'aws:kms':
                # Re-encrypt with the existing key
                copy_from_args['ServerSideEncryption'] = 'aws:kms'
                copy_from_args['SSEKMSKeyId'] = old_sse_key
            if not sse_kms_key_id and old_sse_type == 'AES256':
                # Re-encrypt with the existing AES256
                copy_from_args['ServerSideEncryption'] = 'AES256'

            new_obj.copy_from(**copy_from_args)
            log.debug('Renamed "{}" to "{}"'.format(old_key, new_key))
            # Either way, we delete the old object unless we are inplace
            # re-encrypting
            if new_key != old_key:
                s3.Object(bucket, old_key).delete()
                log.debug('Deleted "{}"'.format(old_key))
            obj_count += 1

        log.info(('Finished renaming/re-encrypting '
                  '{} objects').format(obj_count))
Beispiel #10
0
 def test_parse_s3(self):
     self.assertRaises(ValueError, utils.parse_s3, 'bogus')
     self.assertEqual(
         utils.parse_s3('s3://things'),
         ('s3://things', 'things', ''),
     )
Beispiel #11
0
def s3_rename(output_dir, old, new, sse_kms_key_id):
    # move the old data into the new area
    session = Session()
    client = session.client('s3')
    s3 = session.resource('s3')
    s3_path, bucket, key_prefix = parse_s3(output_dir)

    # Ensure bucket exists
    try:
        client.head_bucket(Bucket=bucket)
    except ClientError:
        raise ArgumentError('S3 bucket {} does not exist.'.format(bucket))

    log.info(
        'Retrieving list of S3 objects to rename in bucket "{}"'.format(
            bucket
        )
    )
    paginator = client.get_paginator('list_objects_v2')
    rename_iterator = paginator.paginate(Bucket=bucket, Prefix=old + '/')
    obj_count = 0

    for page in rename_iterator:
        # loop through the pages of results renaming

        if page.get('Contents') is None:
            raise ArgumentError('Key {} does not exist in bucket {}'.format(
                old, bucket))

        # Loop through the old objects copying and deleting
        for obj in page.get('Contents'):
            old_key = obj.get('Key')
            old_meta = client.head_object(Bucket=bucket, Key=old_key)
            old_sse_type = old_meta.get('ServerSideEncryption')
            old_sse_key = old_meta.get('SSEKMSKeyId')
            new_key = new + old_key[len(old):]

            # check that we haven't already run and have existing data
            # in the new key
            new_obj = s3.Object(bucket, new_key)
            if new_key == old_key:
                log.debug(('Old and new keys match and new SSEKMSKeyId '
                         'Specified, re-encrypting {}').format(new_obj.key))
            else:
                try:
                    new_obj.load()
                    if new_key != old_key:
                        log.info('Skipping existing output in new '
                                 'location: {}'.format(new_obj.key))
                        continue
                except ClientError as e:
                    response_code = e.response.get('Error').get('Code')
                    if response_code == '404':
                        # the obj doesn't exist so we will copy
                        # the existing obj to the new spot
                        pass
                    else:
                        raise

            copy_from_args = dict(
                CopySource={
                    'Bucket': bucket,
                    'Key': old_key,
                    'MetadataDirective': 'COPY'
                })

            if sse_kms_key_id:
                # Re-encrypt with a new key
                copy_from_args['ServerSideEncryption'] = 'aws:kms'
                copy_from_args['SSEKMSKeyId'] = sse_kms_key_id
            if not sse_kms_key_id and old_sse_type == 'aws:kms':
                # Re-encrypt with the existing key
                copy_from_args['ServerSideEncryption'] = 'aws:kms'
                copy_from_args['SSEKMSKeyId'] = old_sse_key
            if not sse_kms_key_id and old_sse_type == 'AES256':
                # Re-encrypt with the existing AES256
                copy_from_args['ServerSideEncryption'] = 'AES256'

            new_obj.copy_from(**copy_from_args)
            log.debug('Renamed "{}" to "{}"'.format(old_key, new_key))
            # Either way, we delete the old object unless we are inplace
            # re-encrypting
            if new_key != old_key:
                s3.Object(bucket, old_key).delete()
                log.debug('Deleted "{}"'.format(old_key))
            obj_count += 1

        log.info(('Finished renaming/re-encrypting '
                  '{} objects').format(obj_count))