Example #1
0
def list_objects(s3_prefix, client=None, max_objects=MAX_LIST_OBJECTS):
    """Given `s3_dirpath_prefix` s3 bucket and prefix to list, yield the full
    s3 paths of every object in the associated bucket which match the prefix.

    Parameters
    ----------
    s3_prefix : str
        Full s3 path to directory and prefix to list:
        e.g. s3://hstdp/messages/dataset-processed
    client : get_default_client()
        Optional boto3 s3 client to re-use for multiple files.
    max_objects : int
        Max number of S3 objects to return.

    Iterates
    ------
    [ full_s3_object_path, ... ]  :  iter( [ str ] )
        list of full s3 paths of objects matching `s3_prefix` except
        `s3_prefix` itself.
    """
    log.verbose("s3.list_objects", s3_prefix, max_objects)
    client, bucket_name, prefix = _s3_setup(client, s3_prefix)
    paginator = client.get_paginator("list_objects_v2")
    config = {"MaxItems": max_objects, "PageSize": 1000}
    for page in paginator.paginate(Bucket=bucket_name,
                                   Prefix=prefix,
                                   PaginationConfig=config):
        for result in page.get("Contents", []):
            if result["Key"]:
                yield "s3://" + bucket_name + "/" + result["Key"]
Example #2
0
def get_object(s3_filepath, client=None, encoding="utf-8"):
    """Given `s3_dirpath_prefix` s3 bucket and prefix to list, return the full
    s3 paths of every object in the associated bucket which match the prefix.

    Parameters
    ----------
    s3_dirpath_prefix : str
        Full s3 path to object to fetch
        e.g. s3://hstdp/batch-1-2020-06-11T19-35-51/acs/j8cb010b0/process.txt
    client : get_default_client()
        Optional boto3 s3 client to re-use for multiple files.
    encoding : str
        Encoding to decode object contents with.  Default 'utf-8'.

    Returns
    ------
    object contents : str or bytes
    """
    log.verbose("s3.get_object", s3_filepath)
    client, bucket_name, object_name = _s3_setup(client, s3_filepath)
    response = client.get_object(Bucket=bucket_name, Key=object_name)
    binary = response["Body"].read()
    if encoding:
        return binary.decode(encoding)
    return binary
Example #3
0
def download_objects(dirpath, s3_dirpath, max_objects=1000, client=None):
    """Given `s3_dirpath` s3 directory to download, copy it to a local file system
    at `dirpath`.

    Parameters
    ----------
    dirpath : str
        Local filesystem path where s3 directory will be copied to.
        e.g. /outputs/batch-1-2020-06-11T19-35-51/acs/j8cb010b0
    s3_dirpath : str
        Full s3 path to directory to download,  including the bucket prefix,
        e.g. s3://hstdp/batch-1-2020-06-11T19-35-51/data/acs/j8cb010b0
    client : get_default_client()
        Optional boto3 s3 client to re-use for multiple files.
    max_objects : int
        Max number of files to list and download.

    Returns
    ------
    downloads : list (str)
       file paths of downloaded files.
    """
    log.verbose("s3.download_objects", dirpath, s3_dirpath, max_objects)
    client = client or get_default_client()
    downloads = []
    for s3_filepath in list_objects(s3_dirpath,
                                    max_objects=max_objects,
                                    client=client):
        local_filepath = os.path.abspath(
            s3_filepath.replace(s3_dirpath, dirpath))
        download_filepath(local_filepath, s3_filepath, client)
        downloads.append(local_filepath)
    return downloads
Example #4
0
def copy_object(s3_filepath_from, s3_filepath_to, client=None):
    """Given `s3_filepath_from` pointing to an s3 source object, copy
    its contents to `s3_filepath_to`.

    Parameters
    ----------
    s3_filepath_from : str
        s3 source object path.
        e.g. s3://hstdp/batch-1-2020-06-11T19-35-51/acs/j8cb010b0/process.txt
    s3_filepath_to : str
        s3 destination object path.
    client : get_default_client()
        Optional boto3 s3 client to re-use for multiple files.
    Returns
    ------
    None
    """
    log.verbose("s3.copy_object", s3_filepath_from, s3_filepath_to)
    client = client or get_default_client()
    from_bucket_name, from_object_name = s3_split_path(s3_filepath_from)
    to_bucket_name, to_object_name = s3_split_path(s3_filepath_to)
    return client.copy_object(Bucket=to_bucket_name,
                              Key=to_object_name,
                              CopySource={
                                  "Bucket": from_bucket_name,
                                  "Key": from_object_name
                              })
Example #5
0
def put_object(string, s3_filepath, encoding="utf-8", client=None):
    """Given `string` to upload, copy it to `s3_filepath` which effectively
    describes the full path of a file in S3 storage defining both bucket
    and object key.
    """
    log.verbose("s3.put_object", s3_filepath, "length", len(string))
    client, bucket_name, object_name = _s3_setup(client, s3_filepath)
    if encoding:
        string = string.encode(encoding)
    client.put_object(Body=string, Bucket=bucket_name, Key=object_name)
Example #6
0
def delete_object(s3_filepath, client=None):
    """Given `s3_filepath` delete the corresponding object.

    Parameters
    ----------
    s3_filepath : str
        Full s3 path to object to delete,  including the bucket prefix,
        e.g. s3://hstdp/batch-1-2020-06-11T19-35-51/acs/j8cb010b0/process.txt
    client : get_default_client()
        Optional boto3 s3 client to re-use for multiple files.
    Returns
    ------
    None
    """
    log.verbose("s3.delete_object", s3_filepath)
    client, bucket_name, object_name = _s3_setup(client, s3_filepath)
    return client.delete_object(Bucket=bucket_name, Key=object_name)
Example #7
0
def parse_s3_event(event):
    """Decode the S3 `event` message generated by message write operations.

    See S3 docs: https://docs.aws.amazon.com/AmazonS3/latest/userguide/notification-content-structure.html
    See also the callers of this function.

    Returns bucket_name, ipppssoot
    """
    log.verbose("S3 Event:", event)

    message = event["Records"][0]["s3"]["object"]["key"]
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    ipst = message.split("-")[-1]

    log.info(f"received {message} : bucket = {bucket_name}, ipppssoot = {ipst}")

    return "s3://" + bucket_name, ipst
Example #8
0
def download_filepath(filepath, s3_filepath, client=None):
    """Given `filepath` to download, copy s3 object  at `s3_filepath` to it.

    Parameters
    ----------
    filepath : str
       Local filesystem path to file to download, including filename.
    s3_filepath : str
        Full s3 path to object to download,  including the bucket prefix,
        e.g. s3://hstdp/batch-1-2020-06-11T19-35-51/acs/j8cb010b0/process.txt
    client : get_default_client()
        Optional boto3 s3 client to re-use for multiple files.
    Returns
    ------
    None
    """
    log.verbose("s3.download_filepath:", filepath, s3_filepath)
    client, bucket_name, object_name = _s3_setup(client, s3_filepath)
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    return client.download_file(bucket_name, object_name, filepath)
Example #9
0
def move_object(s3_filepath_from, s3_filepath_to, client=None):
    """Given `s3_filepath_from` pointing to an s3 source object, copy
    its contents to `s3_filepath_to` and delete it,  effectively moving
    the object.

    Parameters
    ----------
    s3_filepath_from : str
        s3 source object path.
        e.g. s3://hstdp/batch-1-2020-06-11T19-35-51/acs/j8cb010b0/process.txt
    s3_filepath_to : str
        s3 destination object path.
    client : get_default_client()
        Optional boto3 s3 client to re-use for multiple files.
    Returns
    ------
    None
    """
    log.verbose("s3.move_object", s3_filepath_from, s3_filepath_to)
    client = client or get_default_client()
    copy_object(s3_filepath_from, s3_filepath_to, client)
    delete_object(s3_filepath_from, client)