def __init__(self, ctx): super(S3Output, self).__init__(ctx) self.date_path = datetime.datetime.now().strftime('%Y/%m/%d/%H') self.s3_path, self.bucket, self.key_prefix = parse_s3( self.ctx.output_path) self.root_dir = tempfile.mkdtemp() self.transfer = None
def __init__(self, ctx): super(S3Output, self).__init__(ctx) self.date_path = datetime.datetime.now().strftime('%Y/%m/%d/%H') self.s3_path, self.bucket, self.key_prefix = parse_s3( self.ctx.output_path) self.root_dir = tempfile.mkdtemp() self.transfer = None
def __init__(self, ctx, config): self.ctx = ctx self.config = config self.output_path = self.get_output_path(self.config['url']) self.s3_path, self.bucket, self.key_prefix = utils.parse_s3( self.output_path) self.root_dir = tempfile.mkdtemp() self.transfer = None
def __init__(self, ctx, config): self.ctx = ctx self.config = config self.output_path = self.get_output_path(self.config['url']) self.s3_path, self.bucket, self.key_prefix = utils.parse_s3( self.output_path) self.root_dir = tempfile.mkdtemp() self.transfer = None
def _upload_func(self, s3_uri, func, archive): _, bucket, key_prefix = parse_s3(s3_uri) key = "%s/%s" % (key_prefix, func.name) transfer = S3Transfer(self.session_factory().client('s3'), config=TransferConfig(multipart_threshold=1024 * 1024 * 4)) transfer.upload_file(archive.path, bucket=bucket, key=key, extra_args={'ServerSideEncryption': 'AES256'}) return bucket, key
def _upload_func(self, s3_uri, func, archive): _, bucket, key_prefix = parse_s3(s3_uri) key = "%s/%s" % (key_prefix, func.name) transfer = S3Transfer( self.session_factory().client('s3'), config=TransferConfig( multipart_threshold=1024 * 1024 * 4)) transfer.upload_file( archive.path, bucket=bucket, key=key, extra_args={ 'ServerSideEncryption': 'AES256'}) return bucket, key
def test_parse_s3(self): self.assertRaises(ValueError, utils.parse_s3, "bogus") self.assertEqual(utils.parse_s3("s3://things"), ("s3://things", "things", ""))
def test_parse_s3(self): self.assertRaises(ValueError, utils.parse_s3, 'bogus') self.assertEqual( utils.parse_s3('s3://things'), ('s3://things', 'things', ''), )
def s3_rename(output_dir, old, new, sse_kms_key_id): # move the old data into the new area session = Session() client = session.client('s3') s3 = session.resource('s3') s3_path, bucket, key_prefix = parse_s3(output_dir) # Ensure bucket exists try: client.head_bucket(Bucket=bucket) except ClientError: raise ArgumentError('S3 bucket {} does not exist.'.format(bucket)) log.info( 'Retrieving list of S3 objects to rename in bucket "{}"'.format( bucket ) ) paginator = client.get_paginator('list_objects_v2') rename_iterator = paginator.paginate(Bucket=bucket, Prefix=old + '/') obj_count = 0 for page in rename_iterator: # loop through the pages of results renaming if page.get('Contents') is None: raise ArgumentError('Key {} does not exist in bucket {}'.format( old, bucket)) # Loop through the old objects copying and deleting for obj in page.get('Contents'): old_key = obj.get('Key') old_meta = client.head_object(Bucket=bucket, Key=old_key) old_sse_type = old_meta.get('ServerSideEncryption') old_sse_key = old_meta.get('SSEKMSKeyId') new_key = new + old_key[len(old):] # check that we haven't already run and have existing data # in the new key new_obj = s3.Object(bucket, new_key) if new_key == old_key: log.debug(('Old and new keys match and new SSEKMSKeyId ' 'Specified, re-encrypting {}').format(new_obj.key)) else: try: new_obj.load() if new_key != old_key: log.info('Skipping existing output in new ' 'location: {}'.format(new_obj.key)) continue except ClientError as e: response_code = e.response.get('Error').get('Code') if response_code == '404': # the obj doesn't exist so we will copy # the existing obj to the new spot pass else: raise copy_from_args = dict( CopySource={ 'Bucket': bucket, 'Key': old_key, 'MetadataDirective': 'COPY' }) if sse_kms_key_id: # Re-encrypt with a new key copy_from_args['ServerSideEncryption'] = 'aws:kms' copy_from_args['SSEKMSKeyId'] = sse_kms_key_id if not sse_kms_key_id and old_sse_type == 'aws:kms': # Re-encrypt with the existing key copy_from_args['ServerSideEncryption'] = 'aws:kms' copy_from_args['SSEKMSKeyId'] = old_sse_key if not sse_kms_key_id and old_sse_type == 'AES256': # Re-encrypt with the existing AES256 copy_from_args['ServerSideEncryption'] = 'AES256' new_obj.copy_from(**copy_from_args) log.debug('Renamed "{}" to "{}"'.format(old_key, new_key)) # Either way, we delete the old object unless we are inplace # re-encrypting if new_key != old_key: s3.Object(bucket, old_key).delete() log.debug('Deleted "{}"'.format(old_key)) obj_count += 1 log.info(('Finished renaming/re-encrypting ' '{} objects').format(obj_count))
def test_parse_s3(self): self.assertRaises(ValueError, utils.parse_s3, 'bogus') self.assertEqual( utils.parse_s3('s3://things'), ('s3://things', 'things', ''), )
def s3_rename(output_dir, old, new, sse_kms_key_id): # move the old data into the new area session = Session() client = session.client('s3') s3 = session.resource('s3') s3_path, bucket, key_prefix = parse_s3(output_dir) # Ensure bucket exists try: client.head_bucket(Bucket=bucket) except ClientError: raise ArgumentError('S3 bucket {} does not exist.'.format(bucket)) log.info( 'Retrieving list of S3 objects to rename in bucket "{}"'.format( bucket ) ) paginator = client.get_paginator('list_objects_v2') rename_iterator = paginator.paginate(Bucket=bucket, Prefix=old + '/') obj_count = 0 for page in rename_iterator: # loop through the pages of results renaming if page.get('Contents') is None: raise ArgumentError('Key {} does not exist in bucket {}'.format( old, bucket)) # Loop through the old objects copying and deleting for obj in page.get('Contents'): old_key = obj.get('Key') old_meta = client.head_object(Bucket=bucket, Key=old_key) old_sse_type = old_meta.get('ServerSideEncryption') old_sse_key = old_meta.get('SSEKMSKeyId') new_key = new + old_key[len(old):] # check that we haven't already run and have existing data # in the new key new_obj = s3.Object(bucket, new_key) if new_key == old_key: log.debug(('Old and new keys match and new SSEKMSKeyId ' 'Specified, re-encrypting {}').format(new_obj.key)) else: try: new_obj.load() if new_key != old_key: log.info('Skipping existing output in new ' 'location: {}'.format(new_obj.key)) continue except ClientError as e: response_code = e.response.get('Error').get('Code') if response_code == '404': # the obj doesn't exist so we will copy # the existing obj to the new spot pass else: raise copy_from_args = dict( CopySource={ 'Bucket': bucket, 'Key': old_key, 'MetadataDirective': 'COPY' }) if sse_kms_key_id: # Re-encrypt with a new key copy_from_args['ServerSideEncryption'] = 'aws:kms' copy_from_args['SSEKMSKeyId'] = sse_kms_key_id if not sse_kms_key_id and old_sse_type == 'aws:kms': # Re-encrypt with the existing key copy_from_args['ServerSideEncryption'] = 'aws:kms' copy_from_args['SSEKMSKeyId'] = old_sse_key if not sse_kms_key_id and old_sse_type == 'AES256': # Re-encrypt with the existing AES256 copy_from_args['ServerSideEncryption'] = 'AES256' new_obj.copy_from(**copy_from_args) log.debug('Renamed "{}" to "{}"'.format(old_key, new_key)) # Either way, we delete the old object unless we are inplace # re-encrypting if new_key != old_key: s3.Object(bucket, old_key).delete() log.debug('Deleted "{}"'.format(old_key)) obj_count += 1 log.info(('Finished renaming/re-encrypting ' '{} objects').format(obj_count))