""" Helper method for boto3 operations. """ import boto3 from runcmd import get_item_from_dict from runcmd.logging_helper import get_logger LOG = get_logger(__name__) DEFAULT_REGION = "ap-southeast-2" BOTO_ELEMENTS = {} def generate_key(resource_type, service, region): """ Generates the key for the resources to store in the cache dict. """ return "%s-%s-%s" % (resource_type, service, region) def get_from_cache(resource_type, service, region): """ Removes an element from the cache. """ return get_item_from_dict(BOTO_ELEMENTS, generate_key(resource_type, service, region)) def add_to_cache(resource_type, service, region, element): """ Adds an element to the cache. """ BOTO_ELEMENTS[generate_key(resource_type, service, region)] = element
""" Converts a glue table to a csv file """ from runcmd import generate_args, spark as S from runcmd.logging_helper import get_logger JOB_NAME = "table_to_csv" LOGGER = get_logger(JOB_NAME) def main(): """ Entry point for Spark Driver Execution. """ args = generate_args({ "--bucket": "The destination bucket", "--prefix": "The destination prefix", "--glue-table": "The name of the table to export", "--checkpoint-dir": "The checkpoint directory", }) spark = S.create_spark_session(JOB_NAME, checkpoint_dir=args.checkpoint_dir) s3_path = "s3://%s/%s" % (args.bucket, args.prefix) LOGGER.info("Writing table %s to path %s." % (args.glue_table, s3_path)) dataframe = spark.sql("select * from %s" % args.glue_table) S.write_to_csv(dataframe, args.bucket, args.prefix) LOGGER.info("Operation Completed.")
""" Process to remove S3 object tags from a prefix. """ from runcmd import generate_args, s3 as S3 from runcmd.logging_helper import get_logger LOGGER = get_logger(__name__) def main(): """ Entry point for Spark Driver Execution. """ args = generate_args({ "--bucket": "The input bucket of the files to process", "--prefix": "The input prefix where those files live", }) items = S3.list_items_with_prefix(args.bucket, args.prefix) for item in items: LOGGER.info("Removing tags from s3://%s/%s" % (item.bucket_name, item.key)) S3.remove_all_tags(item.bucket_name, item.key)