def test_s3_python_object(): my_dict_a = {"a": 1} my_dict_b = {"b": 2} my_list = [1, 2, 3] my_complex_dict = { "1": 2, "my_list": [0, 9], "my_dict": { "z": -1, "w": -2 } } s3_key = "my_object" s3_access = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str) for my_object in (my_dict_a, my_dict_b, my_list, my_complex_dict): s3_access.upload_object_as_json(my_object, s3_key) my_dict_from_s3 = s3_access.download_object_as_json(s3_key) assert my_object == my_dict_from_s3 my_dict_from_s3 = s3_access.download_object_as_json_cached(s3_key) assert my_object == my_dict_from_s3 my_dict_from_s3 = s3_access.download_object_as_json_cached( s3_key) # this will be the cached version assert my_object == my_dict_from_s3 assert s3_access.download_status.cache_hit
def test_s3_string(): s3_access = S3Access(test_awsimple_str) s3_access.write_string(test_awsimple_str, test_awsimple_str) d = s3_access.dir() metadata = d[test_awsimple_str] assert metadata.size == len(test_awsimple_str) assert metadata.key == test_awsimple_str # the contents are the same as the key # https://passwordsgenerator.net/sha512-hash-generator/ assert metadata.sha512.lower( ) == "D16764F12E4D13555A88372CFE702EF8AE07F24A3FFCEDE6E1CDC8B7BFC2B18EC3468A7752A09F100C9F24EA2BC77566A08972019FC04CF75AB3A64B475BDFA3".lower( )
def test_aws_access(): # test basic AWS access bucket_name = __application_name__ s3_key = "test.txt" s3_value = "hi" s3_access = S3Access(bucket_name) s3_access.create_bucket() # when mocking always have to create the bucket s3_access.write_string(s3_value, s3_key) assert s3_access.read_string(s3_key) == s3_value
def test_s3_object_does_not_exist(): i_do_not_exist_key = "i-do-not-exist" s3_access = S3Access( profile_name=test_awsimple_str, bucket_name=test_awsimple_str) # keyword parameter for bucket_name assert s3_access.bucket_exists() # make sure the bucket exists with pytest.raises(s3_access.client.exceptions.NoSuchKey): s3_access.read_string(i_do_not_exist_key) with pytest.raises(AWSimpleException): s3_access.get_s3_object_metadata(i_do_not_exist_key)
def test_s3_upload(): contents = "I am public readable" s3_access = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str) s3_access.set_public_readable(True) test_file_name = "public_readable.txt" test_file_path = Path(temp_dir, test_file_name) test_file_path.open("w").write(contents) assert s3_access.upload(test_file_path, test_file_name, force=True) time.sleep(3) assert s3_access.object_exists(test_file_name) # read from the URL to see if the contents are public readable metadata = s3_access.get_s3_object_metadata(test_file_name) object_contents = get(metadata.url).content.decode("utf-8") assert object_contents == contents
def test_s3_dir(): s3_access = S3Access(test_awsimple_str, profile_name=test_awsimple_str ) # use non-keyword parameter for bucket_name # set up s3_access.create_bucket() # may already exist test_file_name = "test.txt" test_file_path = Path(temp_dir, test_file_name) test_file_path.open("w").write("hello world") s3_access.upload(test_file_path, test_file_name) # may already be in S3 s3_dir = s3_access.dir() pprint(s3_dir) md = s3_dir[test_file_name] assert md.key == test_file_name assert md.sha512 == "309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f" # "hello world"
def test_aws_test(): # test the test() method (basic AWS connection) # these should work if not is_mock(): assert AWSAccess(profile_name=test_awsimple_str).test() assert S3Access(test_awsimple_str, profile_name=test_awsimple_str).test() assert DynamoDBAccess(test_awsimple_str, profile_name=test_awsimple_str).test() assert SQSAccess(test_awsimple_str, profile_name=test_awsimple_str).test() if not is_mock(): # this (non-existent) profile doesn't have access at all with pytest.raises(ProfileNotFound): AWSAccess(profile_name="IAmNotAProfile").test()
def test_cache_eviction(s3_access): # force cache eviction cache_max = 100 eviction_dir = Path(temp_dir, "eviction") eviction_cache = Path(eviction_dir, "cache") s3_access_cache_eviction = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str, cache_dir=eviction_cache, cache_max_absolute=cache_max) size = 50 rmtree(eviction_dir, ignore_errors=True) while size <= 2 * cache_max: file_name = f"t{size}.txt" source_file_path = Path(eviction_dir, "source", file_name) source_file_path.parent.mkdir(parents=True, exist_ok=True) # upload with source_file_path.open("w") as f: f.truncate(round( size)) # this quickly makes a (sparse) file filled with zeros s3_access_cache_eviction.upload(source_file_path, file_name) dest_path = Path(eviction_dir, "dest", file_name) # cold download status_cold = s3_access_cache_eviction.download_cached( file_name, dest_path) assert not status_cold.cache_hit if size <= cache_max: assert status_cold.cache_write # warm download assert dest_path.exists() status_warm = s3_access_cache_eviction.download_cached( file_name, dest_path) if size <= cache_max: assert status_warm.cache_hit assert not status_warm.cache_write assert dest_path.exists() # make sure cache stays within max size limit cache_size = get_directory_size(eviction_cache) print(f"{cache_size=}") assert cache_size <= cache_max # make sure we stay within bounds size *= 2
def test_s3_keys(): s3_access = S3Access(test_awsimple_str, profile_name=test_awsimple_str ) # use non-keyword parameter for bucket_name # set up s3_access.create_bucket() # may already exist test_file_name = "test.txt" test_file_name_2 = "test2.txt" test_file_path = Path(temp_dir, test_file_name) test_file_path.open("w").write("hello world") s3_access.upload(test_file_path, test_file_name_2) # may already be in S3 s3_access.upload(test_file_path, test_file_name) # may already be in S3 s3_keys = s3_access.keys() pprint(s3_keys) # for real AWS I may have other objects in the test bucket assert test_file_name in s3_keys assert test_file_name_2 in s3_keys
def test_s3_bucket(): s3_access = S3Access(test_bucket_name, profile_name=test_awsimple_str) # use non-keyword parameter for bucket_name s3_access.create_bucket() # may already exist # wait for bucket to exist timeout_count = 100 while not s3_access.bucket_exists() and timeout_count > 0: time.sleep(3) timeout_count -= 1 assert s3_access.bucket_exists() assert not s3_access.create_bucket() # already exists assert s3_access.delete_bucket() # wait for bucket to get deleted timeout_count = 100 while s3_access.bucket_exists() and timeout_count > 0: time.sleep(3) # wait for bucket to exist timeout_count -= 1 assert not s3_access.bucket_exists() assert not s3_access.delete_bucket() # was nothing to delete
def test_s3_list_buckets(): bucket_names = S3Access().bucket_list() pprint(bucket_names) assert test_awsimple_str in bucket_names
def run(self): preferences = get_preferences(self.ui_type) dry_run = preferences.dry_run backup_directory = os.path.join(preferences.backup_directory, "s3") os.makedirs(backup_directory, exist_ok=True) s3_access = S3Access(profile_name=preferences.aws_profile) decoding = "utf-8" # we delete all whitespace below ls_re = re.compile(r"TotalObjects:([0-9]+)TotalSize:([0-9]+)") buckets = s3_access.bucket_list() self.info_out(f"found {len(buckets)} buckets") count = 0 exclusions_no_comments = ExclusionPreferences( BackupTypes.S3.name).get_no_comments() for bucket_name in buckets: # do the sync if bucket_name in exclusions_no_comments: self.info_out(f"excluding {bucket_name}") else: if dry_run: self.info_out(f"dry run {bucket_name}") else: self.info_out(f"{bucket_name}") # try to find the AWS CLI app paths = [ (Path("venv", "Scripts", "python.exe").absolute(), Path("venv", "Scripts", "aws").absolute()), # local venv (Path("python.exe").absolute(), Path("Scripts", "aws").absolute()) # installed app ] aws_cli_path = None python_path = None for p, a in paths: if p.exists() and a.exists(): aws_cli_path = a python_path = p break if aws_cli_path is None: log.error(f"AWS CLI executable not found ({paths=})") elif python_path is None: log.error(f"Python executable not found ({paths=})") else: aws_cli_path = f'"{str(aws_cli_path)}"' # from Path to str, with quotes for installed app # AWS CLI app also needs the python executable to be in the path if it's not in the same dir, which happens when this program is installed. # Make the directory of our python.exe the first in the list so it's found and not any of the others that may or may not be in the PATH. env_var = deepcopy(os.environ) env_var[ "path"] = f"{str(python_path.parent)};{env_var.get('path', '')}" destination = Path(backup_directory, bucket_name) os.makedirs(destination, exist_ok=True) s3_bucket_path = f"s3://{bucket_name}" # Don't use --delete. We want to keep 'old' files locally. sync_command_line = [ aws_cli_path, "s3", "sync", s3_bucket_path, str(destination.absolute()) ] if dry_run: sync_command_line.append("--dryrun") sync_command_line_str = " ".join(sync_command_line) log.info(sync_command_line_str) try: sync_result = subprocess.run(sync_command_line_str, stdout=subprocess.PIPE, shell=True, env=env_var) except FileNotFoundError as e: self.error_out( f'error executing {" ".join(sync_command_line)} {e}' ) return for line in sync_result.stdout.decode( decoding).splitlines(): log.info(line.strip()) # check the results ls_command_line = [ aws_cli_path, "s3", "ls", "--summarize", "--recursive", s3_bucket_path ] ls_command_line_str = " ".join(ls_command_line) log.info(ls_command_line_str) ls_result = subprocess.run(ls_command_line_str, stdout=subprocess.PIPE, shell=True, env=env_var) ls_stdout = "".join([ c for c in ls_result.stdout.decode(decoding) if c not in " \r\n" ]) # remove all whitespace if len(ls_stdout) == 0: self.error_out( f'"{ls_command_line_str}" failed ({ls_stdout=}) - check internet connection' ) else: ls_parsed = ls_re.search(ls_stdout) if ls_parsed is None: self.error_out( f"parse error:\n{ls_command_line_str=}\n{ls_stdout=}" ) else: count += 1 s3_object_count = int(ls_parsed.group(1)) s3_total_size = int(ls_parsed.group(2)) local_size, local_count = get_dir_size(destination) # rough check that the sync worked if s3_total_size > local_size: # we're missing files message = "not all files backed up" output_routine = self.error_out elif s3_total_size != local_size: # Compare size, not number of files, since aws s3 sync does not copy files of zero size. message = "mismatch" output_routine = self.warning_out else: message = "match" output_routine = log.info output_routine( f"{bucket_name} : {message} (s3_count={s3_object_count}, local_count={local_count}; s3_total_size={s3_total_size}, local_size={local_size})" ) self.info_out( f"{len(buckets)} buckets, {count} backed up, {len(exclusions_no_comments)} excluded" )
def test_s3_empty_bucket(): bucket_name = f"emptybuckettest{platform.node()}{getpass.getuser()}" # must be globally unique when using real S3 print(f"{bucket_name=}") s3_access = S3Access(bucket_name) s3_access.create_bucket() assert len(s3_access.dir()) == 0
def test_mock(): s3_access = S3Access(test_awsimple_str) assert is_mock() == s3_access.is_mocked( ) # make sure that the AWSAccess instance is actually using mocking
def s3_access(): _s3_access = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str, cache_dir=cache_dir) return _s3_access
def test_s3_bucket_not_found(s3_access): s3_access = S3Access(profile_name=test_awsimple_str, bucket_name="doesnotexist") with pytest.raises(BucketNotFound): s3_access.keys()
def read_s3_object(): s3_access = S3Access("testawsimple") print(s3_access.read_string("helloworld.txt"))
def test_s3_bucket_not_found(): with pytest.raises(BucketNotFound): s3_access = S3Access("IDoNotExist") s3_access.dir()
from awsimple import S3Access from os import getlogin # the S3 key is the name of the object in the S3 bucket, somewhat analogous to a file name s3_key = "hello.txt" # setup the s3_access object s3_access = S3Access( f"awsimple-test-bucket-{getlogin()}" ) # bucket names are globally unique, so change this bucket name to something unique to you # let's first make sure the bucket exists s3_access.create_bucket() # write our message to S3 s3_access.write_string("hello world", s3_key) # will output "hello world" print(s3_access.read_string(s3_key))
def __init__(self, target_app_name: str, target_app_author: str): Updater.__init__(self, target_app_name, target_app_author) S3Access.__init__( self, create_bucket_name(target_app_name, target_app_author))