Example #1
0
    def download_fileobj(file_handle, bucket, key, region):
        """Downloads the requested S3 object and saves it into the given file handle.

        This method also returns the downloaded payload.

        Args:
            file_handle (File): A File-like object to save the downloaded contents
            region (str): AWS region
            bucket (str): AWS S3 bucket name
            key (str): AWS S3 key name

        Returns:
            str: The downloaded payload

        Raises:
            ClientError
        """
        try:
            client = boto3.client('s3', config=default_config(region=region))
            client.download_fileobj(bucket, key, file_handle)

            file_handle.seek(0)
            return file_handle.read()
        except ClientError:
            LOGGER.error('An error occurred during S3 DownloadFileobj')
            raise
Example #2
0
    def encrypt(plaintext_data, region, key_alias):
        """Encrypts the given plaintext data using AWS KMS

        See:
          https://docs.aws.amazon.com/kms/latest/APIReference/API_Encrypt.html

        Args:
            plaintext_data (str): The raw, unencrypted data to be encrypted
            region (str): AWS region
            key_alias (str): KMS Key Alias

        Returns:
            string: The encrypted ciphertext

        Raises:
            ClientError
        """
        try:
            key_id = 'alias/{}'.format(key_alias)
            client = boto3.client('kms', config=default_config(region=region))
            response = client.encrypt(KeyId=key_id, Plaintext=plaintext_data)
            return response['CiphertextBlob']
        except ClientError:
            LOGGER.error('An error occurred during KMS encryption')
            raise
Example #3
0
    def __init__(self,
                 database_name,
                 results_bucket,
                 results_prefix,
                 region=None):
        """Initialize the Boto3 Athena Client, and S3 results bucket/key

        Args:
            database_name (str): Athena database name where tables will be queried
            results_bucket (str): S3 bucket in which to store Athena results
            results_prefix (str): S3 key prefix to prepend too results in the bucket
        """
        self._client = boto3.client(
            'athena', config=boto_helpers.default_config(region=region))
        self.database = database_name.strip()

        results_bucket = results_bucket.strip()

        # Make sure the required 's3://' prefix is included
        if not results_bucket.startswith('s3://'):
            results_bucket = 's3://{}'.format(results_bucket)

        # Produces athena_partition_refresh/YYYY/MM/DD S3 keys
        self._s3_results_path = posixpath.join(
            results_bucket, results_prefix,
            datetime.utcnow().strftime('%Y/%m/%d'))
Example #4
0
    def __init__(self):
        queue_url = os.environ.get('SQS_QUEUE_URL', '')
        if not queue_url:
            raise SQSClientError('No queue URL found in environment variables')

        # Only recreate the queue resource if it's not already cached
        SQSClient._queue = (SQSClient._queue or boto3.resource(
            'sqs', config=boto.default_config()).Queue(queue_url))
Example #5
0
    def __init__(self, firehose_config=None, log_sources=None):
        self._client = boto3.client('firehose',
                                    config=boto_helpers.default_config())
        # Create a dictionary to hold parsed payloads by log type.
        # Firehose needs this information to send to its corresponding
        # delivery stream.
        self._categorized_records = defaultdict(list)

        self.load_enabled_log_sources(firehose_config,
                                      log_sources,
                                      force_load=True)
Example #6
0
    def _download_s3_objects(cls, buckets_info):
        """Download S3 files (json format) from S3 buckets into memory.

        Returns:
            dict: A dictionary contains information loaded from S3. The file name
                will be the key, and value is file content in json format.
        """
        # The buckets info only gets passed if the table need refreshed
        if not buckets_info:
            return  # Nothing to do

        # Explicitly set timeout for S3 connection. The boto default timeout is 60 seconds.
        boto_config = boto_helpers.default_config(timeout=10)
        s3_client = boto3.resource('s3', config=boto_config)

        for bucket, files in buckets_info.iteritems():
            for json_file in files:
                try:
                    start_time = time.time()
                    s3_object = s3_client.Object(bucket, json_file).get()
                    size_kb = round(s3_object.get('ContentLength') / 1024.0, 2)
                    size_mb = round(size_kb / 1024.0, 2)
                    display_size = '{}MB'.format(
                        size_mb) if size_mb else '{}KB'.format(size_kb)
                    LOGGER.info(
                        'Downloaded S3 file size %s and updated lookup table %s',
                        display_size, json_file)

                    data = s3_object.get('Body').read()
                except ClientError as err:
                    LOGGER.error(
                        'Encounterred error while downloading %s from %s, %s',
                        json_file, bucket, err.response['Error']['Message'])
                    continue
                except (ConnectTimeoutError, ReadTimeoutError):
                    # Catching ConnectTimeoutError and ReadTimeoutError from botocore
                    LOGGER.exception('Reading %s from S3 timed out', json_file)
                    continue

                # The lookup data can optionally be compressed, so try to decompress
                # This will fall back and use the original data if decompression fails
                try:
                    data = zlib.decompress(data, 47)
                except zlib.error:
                    LOGGER.debug('Data in \'%s\' is not compressed', json_file)

                table_name = os.path.splitext(json_file)[0]
                cls._tables[table_name] = json.loads(data)

                total_time = time.time() - start_time
                LOGGER.info('Downloaded S3 file %s seconds',
                            round(total_time, 2))
Example #7
0
    def head_bucket(bucket, region):
        """Determines if given bucket exists with correct permissions.

        See:
            https://docs.aws.amazon.com/AmazonS3/latest/API/RESTBucketHEAD.html

        Args:
            bucket (str): AWS S3 bucket name
            region (str): AWS Region

        Returns:
            bool: True on success

        Raises:
            ClientError; Raises when the bucket does not exist or is denying permission to access.
        """
        try:
            client = boto3.client('s3', config=default_config(region=region))
            client.head_bucket(Bucket=bucket)
        except ClientError:
            LOGGER.error('An error occurred during S3 HeadBucket')
            raise
Example #8
0
    def put_object(object_data, bucket, key, region):
        """Saves the given data into AWS S3

        Args:
            object_data (str): The raw object data to save
            region (str): AWS region
            bucket (str): AWS S3 bucket name
            key (str): AWS S3 key name

        Returns:
            bool: True on success

        Raises:
            ClientError
        """
        try:
            client = boto3.client('s3', config=default_config(region=region))
            client.put_object(Body=object_data, Bucket=bucket, Key=key)
            return True
        except ClientError:
            LOGGER.error('An error occurred during S3 PutObject')
            raise
Example #9
0
    def decrypt(ciphertext, region):
        """Decrypts the given ciphertext using AWS KMS

        See:
          https://docs.aws.amazon.com/kms/latest/APIReference/API_Decrypt.html

        Args:
            ciphertext (str): The raw, encrypted data to be decrypted
            region (str): AWS region

        Returns:
            string: The decrypted plaintext

        Raises:
            ClientError
        """
        try:
            client = boto3.client('kms', config=default_config(region=region))
            response = client.decrypt(CiphertextBlob=ciphertext)
            return response['Plaintext']
        except ClientError:
            LOGGER.error('An error occurred during KMS decryption')
            raise
Example #10
0
    def create_bucket(bucket, region):
        """Creates the given S3 bucket

        See:
            https://docs.aws.amazon.com/cli/latest/reference/s3api/create-bucket.html

        Args:
            bucket (str): The string name of the intended S3 bucket
            region (str): AWS Region

        Returns:
            bool: True on success

        Raises:
            ClientError
        """
        try:
            client = boto3.client('s3', config=default_config(region=region))
            client.create_bucket(Bucket=bucket)
            return True
        except ClientError:
            LOGGER.error('An error occurred during S3 CreateBucket')
            raise
Example #11
0
 def __init__(self, firehose_config=None, log_sources=None):
     self._client = boto3.client('firehose', config=boto_helpers.default_config())
     self.load_enabled_log_sources(firehose_config, log_sources, force_load=True)