def download_fileobj(file_handle, bucket, key, region): """Downloads the requested S3 object and saves it into the given file handle. This method also returns the downloaded payload. Args: file_handle (File): A File-like object to save the downloaded contents region (str): AWS region bucket (str): AWS S3 bucket name key (str): AWS S3 key name Returns: str: The downloaded payload Raises: ClientError """ try: client = boto3.client('s3', config=default_config(region=region)) client.download_fileobj(bucket, key, file_handle) file_handle.seek(0) return file_handle.read() except ClientError: LOGGER.error('An error occurred during S3 DownloadFileobj') raise
def encrypt(plaintext_data, region, key_alias): """Encrypts the given plaintext data using AWS KMS See: https://docs.aws.amazon.com/kms/latest/APIReference/API_Encrypt.html Args: plaintext_data (str): The raw, unencrypted data to be encrypted region (str): AWS region key_alias (str): KMS Key Alias Returns: string: The encrypted ciphertext Raises: ClientError """ try: key_id = 'alias/{}'.format(key_alias) client = boto3.client('kms', config=default_config(region=region)) response = client.encrypt(KeyId=key_id, Plaintext=plaintext_data) return response['CiphertextBlob'] except ClientError: LOGGER.error('An error occurred during KMS encryption') raise
def __init__(self, database_name, results_bucket, results_prefix, region=None): """Initialize the Boto3 Athena Client, and S3 results bucket/key Args: database_name (str): Athena database name where tables will be queried results_bucket (str): S3 bucket in which to store Athena results results_prefix (str): S3 key prefix to prepend too results in the bucket """ self._client = boto3.client( 'athena', config=boto_helpers.default_config(region=region)) self.database = database_name.strip() results_bucket = results_bucket.strip() # Make sure the required 's3://' prefix is included if not results_bucket.startswith('s3://'): results_bucket = 's3://{}'.format(results_bucket) # Produces athena_partition_refresh/YYYY/MM/DD S3 keys self._s3_results_path = posixpath.join( results_bucket, results_prefix, datetime.utcnow().strftime('%Y/%m/%d'))
def __init__(self): queue_url = os.environ.get('SQS_QUEUE_URL', '') if not queue_url: raise SQSClientError('No queue URL found in environment variables') # Only recreate the queue resource if it's not already cached SQSClient._queue = (SQSClient._queue or boto3.resource( 'sqs', config=boto.default_config()).Queue(queue_url))
def __init__(self, firehose_config=None, log_sources=None): self._client = boto3.client('firehose', config=boto_helpers.default_config()) # Create a dictionary to hold parsed payloads by log type. # Firehose needs this information to send to its corresponding # delivery stream. self._categorized_records = defaultdict(list) self.load_enabled_log_sources(firehose_config, log_sources, force_load=True)
def _download_s3_objects(cls, buckets_info): """Download S3 files (json format) from S3 buckets into memory. Returns: dict: A dictionary contains information loaded from S3. The file name will be the key, and value is file content in json format. """ # The buckets info only gets passed if the table need refreshed if not buckets_info: return # Nothing to do # Explicitly set timeout for S3 connection. The boto default timeout is 60 seconds. boto_config = boto_helpers.default_config(timeout=10) s3_client = boto3.resource('s3', config=boto_config) for bucket, files in buckets_info.iteritems(): for json_file in files: try: start_time = time.time() s3_object = s3_client.Object(bucket, json_file).get() size_kb = round(s3_object.get('ContentLength') / 1024.0, 2) size_mb = round(size_kb / 1024.0, 2) display_size = '{}MB'.format( size_mb) if size_mb else '{}KB'.format(size_kb) LOGGER.info( 'Downloaded S3 file size %s and updated lookup table %s', display_size, json_file) data = s3_object.get('Body').read() except ClientError as err: LOGGER.error( 'Encounterred error while downloading %s from %s, %s', json_file, bucket, err.response['Error']['Message']) continue except (ConnectTimeoutError, ReadTimeoutError): # Catching ConnectTimeoutError and ReadTimeoutError from botocore LOGGER.exception('Reading %s from S3 timed out', json_file) continue # The lookup data can optionally be compressed, so try to decompress # This will fall back and use the original data if decompression fails try: data = zlib.decompress(data, 47) except zlib.error: LOGGER.debug('Data in \'%s\' is not compressed', json_file) table_name = os.path.splitext(json_file)[0] cls._tables[table_name] = json.loads(data) total_time = time.time() - start_time LOGGER.info('Downloaded S3 file %s seconds', round(total_time, 2))
def head_bucket(bucket, region): """Determines if given bucket exists with correct permissions. See: https://docs.aws.amazon.com/AmazonS3/latest/API/RESTBucketHEAD.html Args: bucket (str): AWS S3 bucket name region (str): AWS Region Returns: bool: True on success Raises: ClientError; Raises when the bucket does not exist or is denying permission to access. """ try: client = boto3.client('s3', config=default_config(region=region)) client.head_bucket(Bucket=bucket) except ClientError: LOGGER.error('An error occurred during S3 HeadBucket') raise
def put_object(object_data, bucket, key, region): """Saves the given data into AWS S3 Args: object_data (str): The raw object data to save region (str): AWS region bucket (str): AWS S3 bucket name key (str): AWS S3 key name Returns: bool: True on success Raises: ClientError """ try: client = boto3.client('s3', config=default_config(region=region)) client.put_object(Body=object_data, Bucket=bucket, Key=key) return True except ClientError: LOGGER.error('An error occurred during S3 PutObject') raise
def decrypt(ciphertext, region): """Decrypts the given ciphertext using AWS KMS See: https://docs.aws.amazon.com/kms/latest/APIReference/API_Decrypt.html Args: ciphertext (str): The raw, encrypted data to be decrypted region (str): AWS region Returns: string: The decrypted plaintext Raises: ClientError """ try: client = boto3.client('kms', config=default_config(region=region)) response = client.decrypt(CiphertextBlob=ciphertext) return response['Plaintext'] except ClientError: LOGGER.error('An error occurred during KMS decryption') raise
def create_bucket(bucket, region): """Creates the given S3 bucket See: https://docs.aws.amazon.com/cli/latest/reference/s3api/create-bucket.html Args: bucket (str): The string name of the intended S3 bucket region (str): AWS Region Returns: bool: True on success Raises: ClientError """ try: client = boto3.client('s3', config=default_config(region=region)) client.create_bucket(Bucket=bucket) return True except ClientError: LOGGER.error('An error occurred during S3 CreateBucket') raise
def __init__(self, firehose_config=None, log_sources=None): self._client = boto3.client('firehose', config=boto_helpers.default_config()) self.load_enabled_log_sources(firehose_config, log_sources, force_load=True)