Esempio n. 1
0
"""
Helper method for boto3 operations.
"""
import boto3
from runcmd import get_item_from_dict
from runcmd.logging_helper import get_logger

LOG = get_logger(__name__)

DEFAULT_REGION = "ap-southeast-2"

BOTO_ELEMENTS = {}

def generate_key(resource_type, service, region):
    """
    Generates the key for the resources to store in the cache dict.
    """
    return "%s-%s-%s" % (resource_type, service, region)

def get_from_cache(resource_type, service, region):
    """
    Removes an element from the cache.
    """
    return get_item_from_dict(BOTO_ELEMENTS, generate_key(resource_type, service, region))

def add_to_cache(resource_type, service, region, element):
    """
    Adds an element to the cache.
    """
    BOTO_ELEMENTS[generate_key(resource_type, service, region)] = element
Esempio n. 2
0
"""
Converts a glue table to a csv file
"""
from runcmd import generate_args, spark as S
from runcmd.logging_helper import get_logger

JOB_NAME = "table_to_csv"
LOGGER = get_logger(JOB_NAME)


def main():
    """
    Entry point for Spark Driver Execution.
    """
    args = generate_args({
        "--bucket": "The destination bucket",
        "--prefix": "The destination prefix",
        "--glue-table": "The name of the table to export",
        "--checkpoint-dir": "The checkpoint directory",
    })
    spark = S.create_spark_session(JOB_NAME,
                                   checkpoint_dir=args.checkpoint_dir)
    s3_path = "s3://%s/%s" % (args.bucket, args.prefix)
    LOGGER.info("Writing table %s to path %s." % (args.glue_table, s3_path))
    dataframe = spark.sql("select * from %s" % args.glue_table)
    S.write_to_csv(dataframe, args.bucket, args.prefix)
    LOGGER.info("Operation Completed.")
"""
Process to remove S3 object tags from a prefix.
"""
from runcmd import generate_args, s3 as S3
from runcmd.logging_helper import get_logger

LOGGER = get_logger(__name__)


def main():
    """
    Entry point for Spark Driver Execution.
    """
    args = generate_args({
        "--bucket": "The input bucket of the files to process",
        "--prefix": "The input prefix where those files live",
    })
    items = S3.list_items_with_prefix(args.bucket, args.prefix)
    for item in items:
        LOGGER.info("Removing tags from s3://%s/%s" %
                    (item.bucket_name, item.key))
        S3.remove_all_tags(item.bucket_name, item.key)