Example #1
0
def cp_s3_file(s3_src_path, s3_root):
    """
    Copy an s3 file to an s3 location
    Keeps the original file name.
    Args:
        s3_src_path:
        s3_root:

    Returns:

    """
    s3 = b3.resource('s3')
    bucket, s3_path = split_s3_url(s3_root)
    filename = os.path.basename(s3_src_path)
    output_path = os.path.join(s3_path, filename)

    src_bucket, src_key = split_s3_url(s3_src_path)
    # print "Trying to copy from bucket {} key {} to bucket {} key {}".format(src_bucket, src_key, bucket, output_path)

    s3.Object(bucket, output_path).copy_from(CopySource={
        'Bucket': src_bucket,
        'Key': src_key
    },
                                             ServerSideEncryption="AES256")
    return os.path.join("s3://", bucket, output_path)
Example #2
0
def s3_bucket_exists(bucket):
    """
    Code from Amazon docs for checking bucket existence.

    Args:
        bucket:

    Returns:
        booL: whether bucket exists

    """
    import botocore

    s3 = b3.resource('s3')
    exists = True
    try:
        s3.meta.client.head_bucket(Bucket=bucket)
    except botocore.exceptions.ClientError as e:
        error_code = int(e.response['Error']['Code'])
        if error_code == 404:
            exists = False
        elif error_code == 403:
            # for buckets you can get a forbidden instead of resource not found
            # if you have the s3:ListBucket permission on the bucket, Amazon S3 will return a
            # HTTP status code 404 ("no such key") error. If you don't have the s3:ListBucket permission,
            # Amazon S3 will return a HTTP status code 403 ("access denied") error.
            _logger.info(
                "aws_s3: bucket {} raised a 403 (access forbidden), do you have ListBucket permission?"
                .format(bucket))
            exists = False
        else:
            raise
    return exists
Example #3
0
def get_s3_file(s3_url, filename=None):
    s3 = b3.resource('s3')
    bucket, s3_path = split_s3_url(s3_url)
    if filename is None:
        filename = os.path.basename(s3_path)
    s3.Object(bucket, s3_path).download_file(filename)
    return filename
Example #4
0
def ls_s3_url_objects(s3_url):
    """
    Return aws boto3 ObjectSummary's

    Note: There is no current way in boto3 to do globs -- you filter on the client side.

    Returns:
        list:str: list of ObjectSummary's under this path
    """
    result = []

    if s3_url[-1] is not '/':
        s3_url += '/'

    s3 = b3.resource('s3')
    bucket, s3_path = split_s3_url(s3_url)

    if not s3_bucket_exists(bucket):
        return result

    s3_b = s3.Bucket(bucket)
    for i in s3_b.objects.filter(Prefix=s3_path, MaxKeys=1024):
        result.append(i)
    if len(result) == 1024:
        _logger.warn(
            "ls_s3_url_objects: hit MaxKeys 1024 limit in result set.")

    return result
Example #5
0
def delete_s3_dir(s3_url):
    s3 = b3.resource('s3')
    bucket, s3_path = split_s3_url(s3_url)
    bucket = s3.Bucket(bucket)
    objects_to_delete = []
    for obj in bucket.objects.filter(Prefix=s3_path):
        objects_to_delete.append({'Key': obj.key})
    bucket.delete_objects(Delete={'Objects': objects_to_delete})
Example #6
0
def delete_s3_file(s3_url):
    s3 = b3.resource('s3')
    bucket, s3_path = split_s3_url(s3_url)
    response = s3.Object(bucket, s3_path).delete()
    # print response
    # if response['DeleteMarker']:
    #    return True
    # else:
    #    return False
    # TODO: we're getting a different response than the docs say.
    return True
Example #7
0
def get_s3_key(bucket, key, filename=None):
    """

    Args:
        bucket:
        key:
        file_name:
        s3: A boto3.resource('s3')

    Returns:

    """

    #print "PID({}) START bkt[] key[{}] file[{}]".format(multiprocessing.current_process(),key,filename)

    dl_retry = 3

    s3 = b3.resource('s3')

    if filename is None:
        filename = os.path.basename(key)
    else:
        path = os.path.dirname(filename)
        if not os.path.exists(path):
            try:
                os.makedirs(path)
            except os.error as ose:
                # swallow error -- likely file already exists.
                _logger.warn("aws_s3.get_s3_key: Error code {}".format(
                    os.strerror(ose.errno)))

    while dl_retry > 0:
        try:
            s3.Bucket(bucket).download_file(key, filename)
            dl_retry = -1
        except Exception as e:
            _logger.warn(
                "aws_s3.get_s3_key Retry Count [{}] on download_file raised exception {}"
                .format(dl_retry, e))
            dl_retry -= 1
            if dl_retry <= 0:
                _logger.warn(
                    "aws_s3.get_s3_key Fail on downloading file after 3 retries with exception {}"
                    .format(e))
                raise

    #print "PID({}) STOP bkt[] key[{}] file[{}]".format(multiprocessing.current_process(),key,filename)

    return filename
Example #8
0
def put_s3_file(local_path, s3_root):
    """
    Put local file to location at s3_root.
    Keeps original file name.
    Args:
        local_path:
        s3_root:

    Returns:

    """
    s3 = b3.resource('s3')
    bucket, s3_path = split_s3_url(s3_root)
    filename = os.path.basename(local_path)
    s3.Object(bucket, os.path.join(s3_path, filename)).upload_file(
        local_path, ExtraArgs={"ServerSideEncryption": "AES256"})
    return filename
Example #9
0
def ls_s3_url_objects(s3_url):
    """
    Return aws boto3 ObjectSummary's

    Note: There is no current way in boto3 to do globs -- you filter on the client side.

    Returns:
        list:str: list of ObjectSummary's under this path
    """
    result = []

    if s3_url[-1] is not '/':
        s3_url += '/'

    bucket, s3_path = split_s3_url(s3_url)

    #if not s3_bucket_exists(bucket):
    #    return result

    if False:
        client = b3.client('s3')
        paginator = client.get_paginator('list_objects_v2')
        # use delimiter to groupby, which means, list things only at this level.
        #page_iterator = paginator.paginate(Bucket=bucket, Delimiter='/', Prefix=s3_path)
        page_iterator = paginator.paginate(Bucket=bucket, Prefix=s3_path)
        for page in page_iterator:
            result += [obj['Key'] for obj in page['Contents']]
    else:
        s3 = b3.resource('s3')
        try:
            s3_b = s3.Bucket(bucket)
            for i in s3_b.objects.filter(Prefix=s3_path, MaxKeys=1024):
                result.append(i)
            if len(result) == 1024:
                _logger.warn(
                    "ls_s3_url_objects: hit MaxKeys 1024 limit in result set.")
        except Exception as e:
            _logger.error(
                "ls_s3_url_objects: failed with exception {}".format(e))
            raise

    return result
Example #10
0
def s3_path_exists(s3_url):
    """
    Given an entire path, does the key exist?

    If you're checking for partial key, make sure to end with '/'

    This is how you make "folders" in s3, you use a key ending with '/'
    e.g., s3://mybucket/onelevel/anotherdir/
    bucket = mybucket
    key = onelevel/anotherdir/ -- it's a zero size object.

    If checking for full path, you can end with thing itself.

    Args:
        s3_url:

    Returns:

    """
    import botocore

    s3 = b3.resource('s3')
    bucket, key = split_s3_url(s3_url)
    if key is None:
        return s3_bucket_exists(bucket)
    exists = True
    try:
        s3.Object(bucket, key).load()
    except botocore.exceptions.ClientError as e:
        error_code = int(e.response['Error']['Code'])
        _logger.info("Error code {}".format(error_code))
        if error_code == 404:
            exists = False
        else:
            raise

    return exists
Example #11
0
def get_s3_resource():
    return b3.resource('s3')