Example #1
0
def purge():
    """Purge S3 Contents"""
    s3u = S3Utils(Config.ch)
    try:
        Config.ch.s3_client.head_bucket(Bucket=s3u.bucket)
    except ClientError:
        Config.ch.s3_client.create_bucket(Bucket=s3u.bucket)

    s3u.delete_prefix("/")
Example #2
0
 def __init__(self,
              load_archive_base_prefix: str,
              bucket_name: str = Config.s3_bucket,
              **ignored_kwargs):
     """Document load manager
     :param load_archive_base_prefix: base prefix for doc ingest archive
     :param bucket_name: s3 bucket name
     """
     self.s3u = S3Utils(ch=Config.connection_helper, bucket=bucket_name)
     self.load_archive_base_prefix = self.s3u.format_as_prefix(
         load_archive_base_prefix)
     Config.connection_helper.init_dbs()
Example #3
0
 def __init__(self,
              db_backup_base_prefix: str,
              bucket_name: str = Config.s3_bucket,
              **ignored_kwargs):
     """ Core orch/web table tools
     :param db_backup_base_prefix: S3 base prefix for storing/restoring db backup
     :param bucket_name: S3 bucket name
     """
     self.bucket_name = bucket_name
     self.ch = Config.connection_helper
     self.s3u = S3Utils(ch=self.ch, bucket=self.bucket_name)
     self.db_backup_base_prefix = self.s3u.format_as_prefix(db_backup_base_prefix)
     self.ch.init_dbs()
Example #4
0
 def __init__(self,
              current_doc_snapshot_prefix: str,
              backup_doc_snapshot_prefix: str,
              bucket_name: str = Config.s3_bucket,
              **ignored_kwargs):
     """Utils for managing raw/parsed data snapshots
     :param current_doc_snapshot_prefix: S3 prefix to where raw/parsed doc prefixes are located
     :param backup_doc_snapshot_prefix: S3 prefix to where backup raw/parsed doc prefixes are located
     :param bucket_name: S3 bucket name
     """
     self.s3u = S3Utils(ch=Config.connection_helper,
                        bucket=Config.s3_bucket)
     self.bucket_name = bucket_name
     self.current_doc_snapshot_prefix = self.s3u.format_as_prefix(
         current_doc_snapshot_prefix)
     self.backup_doc_snapshot_prefix = self.s3u.format_as_prefix(
         backup_doc_snapshot_prefix)
     Config.connection_helper.init_dbs()
     self.shellu = shutil
Example #5
0
 def __init__(self,
              checkpoint_file_path: str,
              checkpointed_dir_path: str,
              bucket_name: str = Config.s3_bucket,
              checkpoint_ready_marker: t.Optional[str] = None,
              **ignored_kwargs):
     """Utility class for dealing with checkpointed paths in S3
     :param checkpoint_file_path: Path to checkpoint file in s3 (sans bucket name)
     :param checkpointed_dir_path: Path to checkpointed base path in s3 (sans bucket name)
     :param bucket_name: S3 Bucket name
     :param ready_marker: Name of the file that marks checkpointed directory as ready for processing
     """
     self.checkpoint_file_path = checkpoint_file_path
     self.checkpointed_dir_path = (checkpointed_dir_path
                                   if checkpointed_dir_path.endswith('/')
                                   else checkpointed_dir_path + '/')
     self.bucket_name = bucket_name
     self.s3u = S3Utils(Config.connection_helper, bucket=self.bucket_name)
     self.ready_marker = checkpoint_ready_marker
Example #6
0
def reset():
    """Init S3 Buckets and Prime with Test Files"""
    s3u = S3Utils(Config.ch)
    try:
        Config.ch.s3_client.head_bucket(Bucket=s3u.bucket)
    except ClientError:
        Config.ch.s3_client.create_bucket(Bucket=s3u.bucket)

    s3u.delete_prefix("/")

    print(Config.CRAWLER_OUTPUT_PATH)
    for d in Path(Config.CRAWLER_OUTPUT_PATH).iterdir():
        s3u.upload_dir(
            local_dir=(d.resolve()),
            prefix_path='gamechanger/external-uploads/crawler-downloader/' +
            d.name)

    print(Config.PARSED_OUTPUT_PATH)
    for d in Path(Config.PARSED_OUTPUT_PATH).iterdir():
        s3u.upload_dir(
            local_dir=(d.resolve()),
            prefix_path=
            'gamechanger/external-uploads/parsed-crawler-downloader/' + d.name)
Example #7
0
class Conf:
    ch = get_connection_helper_from_env()
    s3_utils = S3Utils(ch)
Example #8
0
def peek():
    """Peek parts of S3 hierarchy"""
    s3u = S3Utils(Config.ch)

    for s in s3u.iter_object_paths_at_prefix('/'):
        print(s)