def run(self): preferences = get_preferences(self.ui_type) backup_directory = preferences.backup_directory dry_run = preferences.dry_run exclusions = ExclusionPreferences( self.backup_type.name).get_no_comments() tables = DynamoDBAccess( profile_name=preferences.aws_profile).get_table_names() self.info_out(f"found {len(tables)} DynamoDB tables") count = 0 for table_name in tables: # awsimple will update immediately if number of table rows changes, but backup from scratch every so often to be safe cache_life = timedelta(days=1).total_seconds() if table_name in exclusions: self.info_out(f"excluding {table_name}") elif dry_run: self.info_out(f"dry run {table_name}") else: self.info_out(f"{table_name}") table = DynamoDBAccess(table_name, cache_life=cache_life) table_contents = table.scan_table_cached() dir_path = Path(backup_directory, "dynamodb") dir_path.mkdir(parents=True, exist_ok=True) with Path(dir_path, f"{table_name}.pickle").open("wb") as f: pickle.dump(table_contents, f) with Path(dir_path, f"{table_name}.json").open("w") as f: f.write(dynamodb_to_json(table_contents, indent=4)) count += 1 self.info_out( f"{count} tables, {count} backed up, {len(exclusions)} excluded")
def test_dynamodb_delete(): dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=test_awsimple_str) dynamodb_access.create_table(id_str) test_id = "deleter" item_value = {id_str: test_id, "color": "blue"} dynamodb_access.put_item(item_value) assert dynamodb_access.get_item( id_str, test_id) == item_value # check that it's set dynamodb_access.delete_item(id_str, test_id) with pytest.raises(DBItemNotFound): print(dynamodb_access.get_item(id_str, test_id)) # check that it's deleted
def test_dynamodb_delete_all_items(): table_name = "awsimple-delete-test" # this test is the only thing we'll use this table for dynamodb_access = DynamoDBAccess(table_name, profile_name=test_awsimple_str) dynamodb_access.create_table(id_str) dynamodb_access.put_item(dict_to_dynamodb({id_str: "me", "answer": 42})) dynamodb_access.put_item(dict_to_dynamodb({id_str: "you", "question": 0})) while len(table_contents := dynamodb_access.scan_table()) != 2: print(f"waiting for the put ...{table_contents}") time.sleep(1) # DynamoDB is "eventually consistent"
def test_dynamodb_sort_as_number(): dynamodb_access = DynamoDBAccess( profile_name=test_awsimple_str, table_name=f"{test_awsimple_str}_sort_as_number", cache_dir=Path("cache")) dynamodb_access.create_table("id", "year", sort_key_type=int) # sort key as number input_item = {"id": "me", "year": 1999, "out_of_time": False} dynamodb_access.put_item(input_item) item = dynamodb_access.get_item("id", "me", "year", 1999) output_item = dynamodb_to_dict(item) pprint(item) assert input_item == output_item dynamodb_access.delete_table()
def test_dynamodb_partition_as_number(): dynamodb_access = DynamoDBAccess( profile_name=test_awsimple_str, table_name=f"{test_awsimple_str}_partition_as_number", cache_dir=Path("cache")) dynamodb_access.create_table( "year", "id", partition_key_type=int) # partition key as number input_item = {"id": "me", "year": 1999, "out_of_time": False} dynamodb_access.put_item(input_item) item = dynamodb_access.get_item("id", "me", "year", 1999) pprint(item) assert input_item == dynamodb_to_dict(item) item = dynamodb_access.query( "year", 1999)[0] # only use the partition key (no sort key) pprint(item) assert input_item == dynamodb_to_dict(item) dynamodb_access.delete_table()
def test_dynamodb_item_not_found(): dynamodb_access = DynamoDBAccess(test_awsimple_str, profile_name=test_awsimple_str) dynamodb_access.create_table(id_str) with pytest.raises(DBItemNotFound): dynamodb_access.get_item(id_str, "I will never ever exist")
def test_aws_test(): # test the test() method (basic AWS connection) # these should work if not is_mock(): assert AWSAccess(profile_name=test_awsimple_str).test() assert S3Access(test_awsimple_str, profile_name=test_awsimple_str).test() assert DynamoDBAccess(test_awsimple_str, profile_name=test_awsimple_str).test() assert SQSAccess(test_awsimple_str, profile_name=test_awsimple_str).test() if not is_mock(): # this (non-existent) profile doesn't have access at all with pytest.raises(ProfileNotFound): AWSAccess(profile_name="IAmNotAProfile").test()
def test_get_table_names(): if is_mock(): dynamodb_access = DynamoDBAccess( test_awsimple_str, profile_name=test_awsimple_str ) # for mock we have to make the table dynamodb_access.create_table( id_str) # have to create the table on the fly for mocking else: dynamodb_access = DynamoDBAccess( profile_name=test_awsimple_str ) # since we're only going to get the existing table names, we don't have to provide a table name dynamodb_tables = dynamodb_access.get_table_names() print(dynamodb_tables) assert len(dynamodb_tables) > 0 assert test_awsimple_str in dynamodb_tables
def test_dynamodb_secondary_index_int(): table_name = f"{test_awsimple_str}3" table = DynamoDBAccess(table_name) sort_key = "id2" secondary_index = "num" table.create_table(id_str, sort_key, secondary_index, secondary_key_type=int) # secondary index as an int table.put_item({id_str: "me", sort_key: "myself", secondary_index: 1}) table.put_item({id_str: "me", sort_key: "moi", secondary_index: 2}) query_results = table.query(id_str, "me") print(f"{query_results=}") assert len( query_results ) == 2 # just the partition key should provide us with both rows table.delete_table()
def test_dynamodb_secondary_index(): table_name = f"{test_awsimple_str}2" table = DynamoDBAccess(table_name) sort_key = "id2" secondary_index = "id3" table.create_table(id_str, sort_key, secondary_index) item = {id_str: "me", sort_key: "myself", secondary_index: "i"} table.put_item(item) item2 = deepcopy(item) item2[sort_key] = "moi même" # also test unicode! item2[secondary_index] = "je" table.put_item(item2) query_results = table.query(id_str, "me") print(f"{query_results=}") assert len( query_results ) == 2 # just the partition key should provide us with both rows assert table.query(secondary_index, "je") == [ item2 ] # with (only) the secondary index (in DynamoDB you can't mix primary and secondary indexes) expected_contents = { DictKey(partition="me", sort="moi même"): { "id": "me", "id2": "moi même", "id3": "je" }, DictKey(partition="me", sort="myself"): { "id": "me", "id2": "myself", "id3": "i" }, } contents = table.scan_table_cached_as_dict() assert contents == expected_contents assert list(contents.keys()) == [ DictKey(partition="me", sort="moi même"), DictKey(partition="me", sort="myself") ] table.delete_table()
def musical_instruments_example(): """ This example shows how to use DynamoDB to keep a table of musical instruments. """ dynamodb_access = DynamoDBAccess("musical_instruments_example", profile_name="testawsimple", cache_life=60) # short cache life for development # Our primary key is a composite of partition (manufacturer) and sort (serial_number). # For a particular manufacturer, serial numbers define exactly one instrument (for this example we are assuming a serial number can be represented as an # integer and doesn't have to be a string). dynamodb_access.create_table("manufacturer", "serial_number", sort_key_type=int) # we have to convert float to a Decimal for DynamoDB dynamodb_access.put_item(dict_to_dynamodb({"manufacturer": "Gibson", "serial_number": 1234, "model": "Ripper", "year": 1983, "price": 1299.50})) dynamodb_access.put_item(dict_to_dynamodb({"manufacturer": "Gibson", "serial_number": 5678, "model": "Thunderbird", "year": 1977, "price": 2399.50})) dynamodb_access.put_item( dict_to_dynamodb( { "manufacturer": "Fender", "serial_number": 1234, "model": "Precision", "year": 2008, "price": 1800.0, } # same serial number as the Gibson Ripper, but that's OK since this is Fender ) ) # get all the Gibson instruments start = time.time() item = dynamodb_access.query("manufacturer", "Gibson") # this can (and will in this case) be multiple items end = time.time() pprint(item) print(f"query took {end-start} seconds") # nominal 0.1 to 0.15 seconds print() # get the entire inventory start = time.time() all_items = dynamodb_access.scan_table_cached() # use cached if the table is large and *only* if we know our table is slowly or never changing end = time.time() pprint(all_items) print(f"scan took {end-start} seconds ({dynamodb_access.cache_hit=})") # always fast for this small data set, but caching can offer a speedup for large tables
def test_dynamodb_query(): table_name = "testawsimpleps" # ps = both partition and sort dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=table_name) dynamodb_access.create_table("id", "name") # three entries for "me" dynamodb_access.put_item({ "id": "me", "name": "james", "answer": 13 }) # this will be the "first" one dynamodb_access.put_item({"id": "me", "name": "james abel", "answer": 1}) dynamodb_access.put_item({ "id": "me", "name": "zzz", "answer": 99 }) # this will be the "last" one dynamodb_access.put_item({"id": "notme", "name": "notjames", "answer": 42}) response = dynamodb_access.query("id", "me") # partition only assert len(response) == 3 response = dynamodb_access.query("id", "me", "name", "james") # partition and sort assert len(response) == 1 response = dynamodb_access.query_begins_with("id", "me", "name", "james a") # begins with assert len(response) == 1 response = dynamodb_access.query_begins_with("id", "me", "name", "jame") assert len(response) == 2 response = dynamodb_access.query("id", "idonotexist") # does not exist assert len(response) == 0 response = dynamodb_access.query_one("id", "me", QuerySelection.highest) assert response["answer"] == 99 assert response["name"] == "zzz" # the "last" entry, as sorted by sort key response = dynamodb_access.query_one("id", "me", QuerySelection.lowest) assert response["answer"] == 13 assert response[ "name"] == "james" # the "first" entry, as sorted by sort key response = dynamodb_access.query_one("id", "idonotexist", QuerySelection.lowest) assert response is None
def test_dynamodb_scan_table_as_dict(): dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=test_awsimple_str, cache_dir=Path("cache"), cache_life=timedelta(seconds=10).total_seconds()) dynamodb_access.create_table(id_str) dynamodb_access.put_item({id_str: "b", "value": 1}) # will be sorted in a different order than we're inputting dynamodb_access.put_item({id_str: "c", "value": 3}) dynamodb_access.put_item({id_str: "a", "value": 2}) expected_contents = {"a": {"id": "a", "value": Decimal("2")}, "b": {"id": "b", "value": Decimal("1")}, "c": {"id": "c", "value": Decimal("3")}} table_contents = dynamodb_access.scan_table_as_dict() check_scan_table(table_contents, expected_contents) table_contents = dynamodb_access.scan_table_cached_as_dict() check_scan_table(table_contents, expected_contents) table_contents = dynamodb_access.scan_table_cached_as_dict() assert dynamodb_access.cache_hit check_scan_table(table_contents, expected_contents) table_contents = dynamodb_access.scan_table_cached_as_dict(sort_key=lambda x: x[id_str]) # test sort_key check_scan_table(table_contents, expected_contents)
def users_example(): """ This example shows how to use DynamoDB to keep a table of users. This also illustrates the flexibility of NoSQL in that we can simply add fields at any time. """ dynamodb_access = DynamoDBAccess("users_example", profile_name="testawsimple") # we're only using email as a partition key in our primary key (no sort key). emails are unique to each user. dynamodb_access.create_table("email") # add our first user using email, first and last name. Initially, we may think that's all we need. dynamodb_access.put_item({ "email": "*****@*****.**", "first_name": "Victor", "last_name": "Wooten" }) # oh no. No one knows who "John Jones" is, they only know "John Paul Jones", so we need to add a middle name. # Luckily we are using a NoSQL database, so we just add "middle_name" in a new key/value pair. No database migration needed. dynamodb_access.put_item({ "email": "*****@*****.**", "first_name": "John", "middle_name": "Paul", "last_name": "Jones" }) # oh no again. No one knows who "Gordon Matthew Thomas Sumner" is either, even with 2 middle names! All they know is "Sting". # We need to add a nickname. No problem since we're using a NoSQL database. dynamodb_access.put_item({ "email": "*****@*****.**", "first_name": "Gordon", "middle_name": "Matthew", "middle_name_2": "Thomas", "last_name": "Sumner", "nickname": "Sting", }) # look up user info for one of our users start = time.time() item = dynamodb_access.get_item( "email", "*****@*****.**" ) # this is a "get" since we're using a key and will always get back exactly one item end = time.time() pprint(item) print( f"took {end-start} seconds" ) # should take just a fraction of a second. 0.05 seconds was a nominal value on our test system.
def test_dynamodb_upsert(): dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=test_awsimple_str) dynamodb_access.create_table(id_str) test_id = "upserter" dynamodb_access.delete_item(id_str, test_id) # make sure the item doesn't exist item_value = {id_str: test_id, "color": "blue"} dynamodb_access.upsert_item(id_str, test_id, item={"color": "blue"}) # insert assert dynamodb_access.get_item( id_str, test_id) == item_value # check that it's set item_value["my_size"] = 9 dynamodb_access.upsert_item(id_str, test_id, item={"my_size": 9}) # update with new data assert dynamodb_access.get_item( id_str, test_id) == item_value # check that it's set to the new value item_value["my_size"] = 10 dynamodb_access.upsert_item(id_str, test_id, item={"my_size": 10}) # update existing data assert dynamodb_access.get_item( id_str, test_id) == item_value # check that it's set to the new value
def test_dynamodb_create_table(): table_name = f"{test_awsimple_str}temp" dynamodb_access = DynamoDBAccess(table_name, profile_name=test_awsimple_str) dynamodb_access.create_table("id") assert dynamodb_access.table_exists( ) # create_table has a waiter so the table should exist at this point dynamodb_access.put_item({"id": "me", "value": 1}) table_data = dynamodb_access.scan_table_cached() pprint(table_data) assert table_data[0]["id"] == "me" assert table_data[0]["value"] == 1 assert len(table_data) == 1 assert len(dynamodb_access.scan_table_cached(invalidate_cache=True)) == 1 dynamodb_access.delete_table() assert not dynamodb_access.delete_table( ) # delete_table has a waiter so the table should exist at this point
def test_dynamodb(): dynamodb_dict = dict_to_dynamodb(sample_input) assert dynamodb_dict["sample1"] == "Test Data" assert math.isclose(float(dynamodb_dict["sample2"]), decimal.Decimal(2.0)) assert dynamodb_dict["sample3"] is True assert dynamodb_dict["sample5"] is None assert dynamodb_dict["sample6"] == {"test": True} assert dynamodb_dict["sample7"] == ["Hello", "World"] assert dynamodb_dict["sample8"] == [ decimal.Decimal(9), decimal.Decimal(10) ] assert dynamodb_dict["DecimalInt"] == decimal.Decimal(42) assert dynamodb_dict["DecimalFloat"] == decimal.Decimal( 2.0) / decimal.Decimal(3.0) assert dynamodb_dict["a_tuple"] == [1, 2, 3] assert dynamodb_dict[ "42"] == "my_key_is_an_int" # test conversion of an int key to a string assert dynamodb_dict["test_date_time"] == "2019-06-04T20:18:55+00:00" assert dynamodb_dict["zero_len_string"] is None # while dictim is case insensitive, when we convert to dict for DynamoDB it becomes case sensitive assert list(dynamodb_dict["dictim"]["HI"])[0] == "there" assert dynamodb_dict["dictim"]["HI"]["there"] == 1 # actually Decimal(1) assert dynamodb_dict["dictim"].get( "hi") is None # we're back to case sensitivity dynamodb_access = DynamoDBAccess( profile_name=test_awsimple_str, table_name=test_awsimple_str, cache_dir=Path("cache"), cache_life=timedelta(seconds=1).total_seconds()) dynamodb_access.create_table(id_str) dynamodb_access.put_item(dynamodb_dict) sample_from_db = dynamodb_access.get_item(id_str, dict_id) assert sample_from_db == dynamodb_dict # make sure we get back exactly what we wrote table_contents = dynamodb_access.scan_table_cached() assert not dynamodb_access.cache_hit check_table_contents(table_contents) table_contents = dynamodb_access.scan_table() check_table_contents(table_contents) table_contents = dynamodb_access.scan_table_cached() assert dynamodb_access.cache_hit check_table_contents(table_contents) assert dynamodb_access.get_primary_keys() == (id_str, None) # no sort key