Ejemplo n.º 1
0
def test_s3_python_object():

    my_dict_a = {"a": 1}
    my_dict_b = {"b": 2}
    my_list = [1, 2, 3]
    my_complex_dict = {
        "1": 2,
        "my_list": [0, 9],
        "my_dict": {
            "z": -1,
            "w": -2
        }
    }

    s3_key = "my_object"
    s3_access = S3Access(profile_name=test_awsimple_str,
                         bucket_name=test_awsimple_str)

    for my_object in (my_dict_a, my_dict_b, my_list, my_complex_dict):
        s3_access.upload_object_as_json(my_object, s3_key)

        my_dict_from_s3 = s3_access.download_object_as_json(s3_key)
        assert my_object == my_dict_from_s3

        my_dict_from_s3 = s3_access.download_object_as_json_cached(s3_key)
        assert my_object == my_dict_from_s3
        my_dict_from_s3 = s3_access.download_object_as_json_cached(
            s3_key)  # this will be the cached version
        assert my_object == my_dict_from_s3
        assert s3_access.download_status.cache_hit
Ejemplo n.º 2
0
def test_s3_string():
    s3_access = S3Access(test_awsimple_str)
    s3_access.write_string(test_awsimple_str, test_awsimple_str)
    d = s3_access.dir()
    metadata = d[test_awsimple_str]
    assert metadata.size == len(test_awsimple_str)
    assert metadata.key == test_awsimple_str  # the contents are the same as the key
    # https://passwordsgenerator.net/sha512-hash-generator/
    assert metadata.sha512.lower(
    ) == "D16764F12E4D13555A88372CFE702EF8AE07F24A3FFCEDE6E1CDC8B7BFC2B18EC3468A7752A09F100C9F24EA2BC77566A08972019FC04CF75AB3A64B475BDFA3".lower(
    )
Ejemplo n.º 3
0
def test_aws_access():

    # test basic AWS access

    bucket_name = __application_name__
    s3_key = "test.txt"
    s3_value = "hi"

    s3_access = S3Access(bucket_name)
    s3_access.create_bucket()  # when mocking always have to create the bucket
    s3_access.write_string(s3_value, s3_key)
    assert s3_access.read_string(s3_key) == s3_value
Ejemplo n.º 4
0
def test_s3_object_does_not_exist():

    i_do_not_exist_key = "i-do-not-exist"

    s3_access = S3Access(
        profile_name=test_awsimple_str,
        bucket_name=test_awsimple_str)  # keyword parameter for bucket_name
    assert s3_access.bucket_exists()  # make sure the bucket exists
    with pytest.raises(s3_access.client.exceptions.NoSuchKey):
        s3_access.read_string(i_do_not_exist_key)

    with pytest.raises(AWSimpleException):
        s3_access.get_s3_object_metadata(i_do_not_exist_key)
Ejemplo n.º 5
0
def test_s3_upload():
    contents = "I am public readable"
    s3_access = S3Access(profile_name=test_awsimple_str,
                         bucket_name=test_awsimple_str)
    s3_access.set_public_readable(True)
    test_file_name = "public_readable.txt"
    test_file_path = Path(temp_dir, test_file_name)
    test_file_path.open("w").write(contents)
    assert s3_access.upload(test_file_path, test_file_name, force=True)
    time.sleep(3)
    assert s3_access.object_exists(test_file_name)

    # read from the URL to see if the contents are public readable
    metadata = s3_access.get_s3_object_metadata(test_file_name)
    object_contents = get(metadata.url).content.decode("utf-8")
    assert object_contents == contents
Ejemplo n.º 6
0
def test_s3_dir():
    s3_access = S3Access(test_awsimple_str, profile_name=test_awsimple_str
                         )  # use non-keyword parameter for bucket_name

    # set up
    s3_access.create_bucket()  # may already exist
    test_file_name = "test.txt"
    test_file_path = Path(temp_dir, test_file_name)
    test_file_path.open("w").write("hello world")
    s3_access.upload(test_file_path, test_file_name)  # may already be in S3

    s3_dir = s3_access.dir()
    pprint(s3_dir)
    md = s3_dir[test_file_name]
    assert md.key == test_file_name
    assert md.sha512 == "309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f"  # "hello world"
Ejemplo n.º 7
0
def test_aws_test():

    # test the test() method (basic AWS connection)

    # these should work
    if not is_mock():
        assert AWSAccess(profile_name=test_awsimple_str).test()
    assert S3Access(test_awsimple_str, profile_name=test_awsimple_str).test()
    assert DynamoDBAccess(test_awsimple_str,
                          profile_name=test_awsimple_str).test()
    assert SQSAccess(test_awsimple_str, profile_name=test_awsimple_str).test()

    if not is_mock():
        # this (non-existent) profile doesn't have access at all
        with pytest.raises(ProfileNotFound):
            AWSAccess(profile_name="IAmNotAProfile").test()
Ejemplo n.º 8
0
def test_cache_eviction(s3_access):
    # force cache eviction
    cache_max = 100
    eviction_dir = Path(temp_dir, "eviction")
    eviction_cache = Path(eviction_dir, "cache")
    s3_access_cache_eviction = S3Access(profile_name=test_awsimple_str,
                                        bucket_name=test_awsimple_str,
                                        cache_dir=eviction_cache,
                                        cache_max_absolute=cache_max)
    size = 50
    rmtree(eviction_dir, ignore_errors=True)
    while size <= 2 * cache_max:
        file_name = f"t{size}.txt"
        source_file_path = Path(eviction_dir, "source", file_name)
        source_file_path.parent.mkdir(parents=True, exist_ok=True)

        # upload
        with source_file_path.open("w") as f:
            f.truncate(round(
                size))  # this quickly makes a (sparse) file filled with zeros
        s3_access_cache_eviction.upload(source_file_path, file_name)

        dest_path = Path(eviction_dir, "dest", file_name)

        # cold download
        status_cold = s3_access_cache_eviction.download_cached(
            file_name, dest_path)
        assert not status_cold.cache_hit
        if size <= cache_max:
            assert status_cold.cache_write

        # warm download
        assert dest_path.exists()
        status_warm = s3_access_cache_eviction.download_cached(
            file_name, dest_path)
        if size <= cache_max:
            assert status_warm.cache_hit
            assert not status_warm.cache_write
        assert dest_path.exists()

        # make sure cache stays within max size limit
        cache_size = get_directory_size(eviction_cache)
        print(f"{cache_size=}")
        assert cache_size <= cache_max  # make sure we stay within bounds

        size *= 2
Ejemplo n.º 9
0
def test_s3_keys():
    s3_access = S3Access(test_awsimple_str, profile_name=test_awsimple_str
                         )  # use non-keyword parameter for bucket_name

    # set up
    s3_access.create_bucket()  # may already exist
    test_file_name = "test.txt"
    test_file_name_2 = "test2.txt"
    test_file_path = Path(temp_dir, test_file_name)
    test_file_path.open("w").write("hello world")
    s3_access.upload(test_file_path, test_file_name_2)  # may already be in S3
    s3_access.upload(test_file_path, test_file_name)  # may already be in S3

    s3_keys = s3_access.keys()
    pprint(s3_keys)
    # for real AWS I may have other objects in the test bucket
    assert test_file_name in s3_keys
    assert test_file_name_2 in s3_keys
Ejemplo n.º 10
0
def test_s3_bucket():
    s3_access = S3Access(test_bucket_name, profile_name=test_awsimple_str)  # use non-keyword parameter for bucket_name
    s3_access.create_bucket()  # may already exist

    # wait for bucket to exist
    timeout_count = 100
    while not s3_access.bucket_exists() and timeout_count > 0:
        time.sleep(3)
        timeout_count -= 1

    assert s3_access.bucket_exists()

    assert not s3_access.create_bucket()  # already exists
    assert s3_access.delete_bucket()

    # wait for bucket to get deleted
    timeout_count = 100
    while s3_access.bucket_exists() and timeout_count > 0:
        time.sleep(3)  # wait for bucket to exist
        timeout_count -= 1

    assert not s3_access.bucket_exists()
    assert not s3_access.delete_bucket()  # was nothing to delete
Ejemplo n.º 11
0
def test_s3_list_buckets():
    bucket_names = S3Access().bucket_list()
    pprint(bucket_names)
    assert test_awsimple_str in bucket_names
Ejemplo n.º 12
0
    def run(self):

        preferences = get_preferences(self.ui_type)
        dry_run = preferences.dry_run

        backup_directory = os.path.join(preferences.backup_directory, "s3")

        os.makedirs(backup_directory, exist_ok=True)

        s3_access = S3Access(profile_name=preferences.aws_profile)

        decoding = "utf-8"

        # we delete all whitespace below
        ls_re = re.compile(r"TotalObjects:([0-9]+)TotalSize:([0-9]+)")

        buckets = s3_access.bucket_list()
        self.info_out(f"found {len(buckets)} buckets")

        count = 0
        exclusions_no_comments = ExclusionPreferences(
            BackupTypes.S3.name).get_no_comments()
        for bucket_name in buckets:

            # do the sync
            if bucket_name in exclusions_no_comments:
                self.info_out(f"excluding {bucket_name}")
            else:
                if dry_run:
                    self.info_out(f"dry run {bucket_name}")
                else:
                    self.info_out(f"{bucket_name}")

                # try to find the AWS CLI app
                paths = [
                    (Path("venv", "Scripts", "python.exe").absolute(),
                     Path("venv", "Scripts", "aws").absolute()),  # local venv
                    (Path("python.exe").absolute(),
                     Path("Scripts", "aws").absolute())  # installed app
                ]
                aws_cli_path = None
                python_path = None
                for p, a in paths:
                    if p.exists() and a.exists():
                        aws_cli_path = a
                        python_path = p
                        break

                if aws_cli_path is None:
                    log.error(f"AWS CLI executable not found ({paths=})")
                elif python_path is None:
                    log.error(f"Python executable not found ({paths=})")
                else:
                    aws_cli_path = f'"{str(aws_cli_path)}"'  # from Path to str, with quotes for installed app
                    # AWS CLI app also needs the python executable to be in the path if it's not in the same dir, which happens when this program is installed.
                    # Make the directory of our python.exe the first in the list so it's found and not any of the others that may or may not be in the PATH.
                    env_var = deepcopy(os.environ)
                    env_var[
                        "path"] = f"{str(python_path.parent)};{env_var.get('path', '')}"

                    destination = Path(backup_directory, bucket_name)
                    os.makedirs(destination, exist_ok=True)
                    s3_bucket_path = f"s3://{bucket_name}"
                    # Don't use --delete.  We want to keep 'old' files locally.
                    sync_command_line = [
                        aws_cli_path, "s3", "sync", s3_bucket_path,
                        str(destination.absolute())
                    ]
                    if dry_run:
                        sync_command_line.append("--dryrun")
                    sync_command_line_str = " ".join(sync_command_line)
                    log.info(sync_command_line_str)

                    try:
                        sync_result = subprocess.run(sync_command_line_str,
                                                     stdout=subprocess.PIPE,
                                                     shell=True,
                                                     env=env_var)
                    except FileNotFoundError as e:
                        self.error_out(
                            f'error executing {" ".join(sync_command_line)} {e}'
                        )
                        return

                    for line in sync_result.stdout.decode(
                            decoding).splitlines():
                        log.info(line.strip())

                    # check the results
                    ls_command_line = [
                        aws_cli_path, "s3", "ls", "--summarize", "--recursive",
                        s3_bucket_path
                    ]
                    ls_command_line_str = " ".join(ls_command_line)
                    log.info(ls_command_line_str)
                    ls_result = subprocess.run(ls_command_line_str,
                                               stdout=subprocess.PIPE,
                                               shell=True,
                                               env=env_var)
                    ls_stdout = "".join([
                        c for c in ls_result.stdout.decode(decoding)
                        if c not in " \r\n"
                    ])  # remove all whitespace
                    if len(ls_stdout) == 0:
                        self.error_out(
                            f'"{ls_command_line_str}" failed ({ls_stdout=}) - check internet connection'
                        )
                    else:
                        ls_parsed = ls_re.search(ls_stdout)
                        if ls_parsed is None:
                            self.error_out(
                                f"parse error:\n{ls_command_line_str=}\n{ls_stdout=}"
                            )
                        else:
                            count += 1
                            s3_object_count = int(ls_parsed.group(1))
                            s3_total_size = int(ls_parsed.group(2))
                            local_size, local_count = get_dir_size(destination)

                            # rough check that the sync worked
                            if s3_total_size > local_size:
                                # we're missing files
                                message = "not all files backed up"
                                output_routine = self.error_out
                            elif s3_total_size != local_size:
                                # Compare size, not number of files, since aws s3 sync does not copy files of zero size.
                                message = "mismatch"
                                output_routine = self.warning_out
                            else:
                                message = "match"
                                output_routine = log.info
                            output_routine(
                                f"{bucket_name} : {message} (s3_count={s3_object_count}, local_count={local_count}; s3_total_size={s3_total_size}, local_size={local_size})"
                            )

        self.info_out(
            f"{len(buckets)} buckets, {count} backed up, {len(exclusions_no_comments)} excluded"
        )
Ejemplo n.º 13
0
def test_s3_empty_bucket():
    bucket_name = f"emptybuckettest{platform.node()}{getpass.getuser()}"  # must be globally unique when using real S3
    print(f"{bucket_name=}")
    s3_access = S3Access(bucket_name)
    s3_access.create_bucket()
    assert len(s3_access.dir()) == 0
Ejemplo n.º 14
0
def test_mock():
    s3_access = S3Access(test_awsimple_str)
    assert is_mock() == s3_access.is_mocked(
    )  # make sure that the AWSAccess instance is actually using mocking
Ejemplo n.º 15
0
def s3_access():
    _s3_access = S3Access(profile_name=test_awsimple_str,
                          bucket_name=test_awsimple_str,
                          cache_dir=cache_dir)
    return _s3_access
Ejemplo n.º 16
0
def test_s3_bucket_not_found(s3_access):
    s3_access = S3Access(profile_name=test_awsimple_str, bucket_name="doesnotexist")
    with pytest.raises(BucketNotFound):
        s3_access.keys()
Ejemplo n.º 17
0
def read_s3_object():
    s3_access = S3Access("testawsimple")
    print(s3_access.read_string("helloworld.txt"))
Ejemplo n.º 18
0
def test_s3_bucket_not_found():
    with pytest.raises(BucketNotFound):
        s3_access = S3Access("IDoNotExist")
        s3_access.dir()
Ejemplo n.º 19
0
from awsimple import S3Access
from os import getlogin

# the S3 key is the name of the object in the S3 bucket, somewhat analogous to a file name
s3_key = "hello.txt"

# setup the s3_access object
s3_access = S3Access(
    f"awsimple-test-bucket-{getlogin()}"
)  # bucket names are globally unique, so change this bucket name to something unique to you

# let's first make sure the bucket exists
s3_access.create_bucket()

# write our message to S3
s3_access.write_string("hello world", s3_key)

# will output "hello world"
print(s3_access.read_string(s3_key))
Ejemplo n.º 20
0
 def __init__(self, target_app_name: str, target_app_author: str):
     Updater.__init__(self, target_app_name, target_app_author)
     S3Access.__init__(
         self, create_bucket_name(target_app_name, target_app_author))