def test_scan_sort_order_string(filled_test_table): test_table, items = filled_test_table got_items = full_scan(test_table) assert len(items) == len(got_items) # Extract just the sort key ("c") from the partition "long" items_long = [x['c'] for x in items if x['p'] == 'long'] got_items_long = [x['c'] for x in got_items if x['p'] == 'long'] # Verify that got_items_long are already sorted (in string order) assert sorted(got_items_long) == got_items_long # Verify that got_items_long are a sorted version of the expected items_long assert sorted(items_long) == got_items_long
def test_scan_filter_expression(filled_test_table): test_table, items = filled_test_table got_items = full_scan(test_table, FilterExpression=Attr("attribute").eq("xxxx")) print(got_items) assert multiset([ item for item in items if 'attribute' in item.keys() and item['attribute'] == 'xxxx' ]) == multiset(got_items) got_items = full_scan(test_table, FilterExpression=Attr("attribute").eq("xxxx") & Attr("another").eq("yy")) print(got_items) assert multiset([ item for item in items if 'attribute' in item.keys() and 'another' in item.keys() and item['attribute'] == 'xxxx' and item['another'] == 'yy' ]) == multiset(got_items)
def test_gsi_missing_attribute_3(test_table_gsi_3): p = random_string() a = random_string() b = random_string() # First, add an item with a missing "a" value. It should appear in the # base table, but not in the index: test_table_gsi_3.put_item(Item={'p': p, 'b': b}) assert test_table_gsi_3.get_item(Key={'p': p})['Item'] == {'p': p, 'b': b} # Note: with eventually consistent read, we can't really be sure that # an item will "never" appear in the index. We hope that if a bug exists # and such an item did appear, sometimes the delay here will be enough # for the unexpected item to become visible. assert not any([i['p'] == p for i in full_scan(test_table_gsi_3, IndexName='hello')]) # Same thing for an item with a missing "b" value: test_table_gsi_3.put_item(Item={'p': p, 'a': a}) assert test_table_gsi_3.get_item(Key={'p': p})['Item'] == {'p': p, 'a': a} assert not any([i['p'] == p for i in full_scan(test_table_gsi_3, IndexName='hello')]) # And for an item missing both: test_table_gsi_3.put_item(Item={'p': p}) assert test_table_gsi_3.get_item(Key={'p': p})['Item'] == {'p': p} assert not any([i['p'] == p for i in full_scan(test_table_gsi_3, IndexName='hello')])
def test_scan_parallel(filled_test_table): test_table, items = filled_test_table for nsegments in [1, 2, 17]: print('Testing TotalSegments={}'.format(nsegments)) got_items = [] for segment in range(nsegments): got_items.extend( full_scan(test_table, TotalSegments=nsegments, Segment=segment)) # The following comparison verifies that each of the expected item # in items was returned in one - and just one - of the segments. assert multiset(items) == multiset(got_items)
def test_scan_attributes_to_get(dynamodb, filled_test_table): table, items = filled_test_table for wanted in [ ['another'], # only non-key attributes (one item doesn't have it!) ['c', 'another'], # a key attribute (sort key) and non-key ['p', 'c'], # entire key ['nonexistent'] # none of the items have this attribute! ]: print(wanted) got_items = full_scan(table, AttributesToGet=wanted) expected_items = [{k: x[k] for k in wanted if k in x} for x in items] assert multiset(expected_items) == multiset(got_items)
def test_tracing_all(with_tracing, test_table_s_isolation_always, dynamodb): # Run the different requests, each one containing a long random string # that we can later use to find with find_tracing_session(): table = test_table_s_isolation_always # PutItem: p_putitem = random_string(20) table.put_item(Item={'p': p_putitem}) # GetItem: p_getitem = random_string(20) table.get_item(Key={'p': p_getitem}) # DeleteItem: p_deleteitem = random_string(20) table.delete_item(Key={'p': p_deleteitem}) # UpdateItem: p_updateitem = random_string(20) table.update_item(Key={'p': p_updateitem}, AttributeUpdates={}) # BatchGetItem: p_batchgetitem = random_string(20) table.meta.client.batch_get_item(RequestItems = {table.name: {'Keys': [{'p': p_batchgetitem}]}}) # BatchWriteItem: p_batchwriteitem = random_string(20) table.meta.client.batch_write_item(RequestItems = {table.name: [{'PutRequest': {'Item': {'p': p_batchwriteitem}}}]}) # Query: p_query = random_string(20) full_query(table, KeyConditionExpression='p = :p', ExpressionAttributeValues={':p': p_query}) # Scan: p_scan = random_string(20) full_scan(table, FilterExpression='p = :p', ExpressionAttributeValues={':p': p_scan}) # Check the traces. NOTE: the following checks are fairly arbitrary, and # may break in the future if we change the tracing messages... expect_tracing_events(dynamodb, p_putitem, ['PutItem', 'CAS successful']) expect_tracing_events(dynamodb, p_getitem, ['GetItem', 'Querying is done']) expect_tracing_events(dynamodb, p_deleteitem, ['DeleteItem', 'CAS successful']) expect_tracing_events(dynamodb, p_updateitem, ['UpdateItem', 'CAS successful']) expect_tracing_events(dynamodb, p_batchgetitem, ['BatchGetItem', 'Querying is done']) expect_tracing_events(dynamodb, p_batchwriteitem, ['BatchWriteItem', 'CAS successful']) expect_tracing_events(dynamodb, p_query, ['Query', 'Querying is done']) expect_tracing_events(dynamodb, p_scan, ['Scan', 'Performing a database query'])
def test_gsi_missing_attribute(test_table_gsi_2): p1 = random_string() x1 = random_string() test_table_gsi_2.put_item(Item={'p': p1, 'x': x1}) p2 = random_string() test_table_gsi_2.put_item(Item={'p': p2}) # Both items are now in the base table: assert test_table_gsi_2.get_item(Key={'p': p1})['Item'] == {'p': p1, 'x': x1} assert test_table_gsi_2.get_item(Key={'p': p2})['Item'] == {'p': p2} # But only the first item is in the index: It can be found using a # Query, and a scan of the index won't find it (but a scan on the base # will). assert_index_query(test_table_gsi_2, 'hello', [{'p': p1, 'x': x1}], KeyConditions={'x': {'AttributeValueList': [x1], 'ComparisonOperator': 'EQ'}}) assert any([i['p'] == p1 for i in full_scan(test_table_gsi_2)]) # Note: with eventually consistent read, we can't really be sure that # and item will "never" appear in the index. We do this test last, # so if we had a bug and such item did appear, hopefully we had enough # time for the bug to become visible. At least sometimes. assert not any([i['p'] == p2 for i in full_scan(test_table_gsi_2, IndexName='hello')])
def test_gsi_identical(dynamodb): table = create_test_table(dynamodb, KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' }], AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }], GlobalSecondaryIndexes=[ { 'IndexName': 'hello', 'KeySchema': [{ 'AttributeName': 'p', 'KeyType': 'HASH' }], 'Projection': { 'ProjectionType': 'ALL' } } ]) items = [{'p': random_string(), 'x': random_string()} for i in range(10)] with table.batch_writer() as batch: for item in items: batch.put_item(item) # Scanning the entire table directly or via the index yields the same # results (in different order). assert multiset(items) == multiset(full_scan(table)) assert_index_scan(table, 'hello', items) # We can't scan a non-existent index with pytest.raises(ClientError, match='ValidationException'): full_scan(table, IndexName='wrong') table.delete()
def test_scan_projection_expression_path(test_table): # This test is similar to test_query_projection_expression_path above, # but uses a scan instead of a query. The scan will generate unrelated # partitions created by other tests (hopefully not too many...) that we # need to ignore. We also need to ask for "p" too, so we can filter by it. p = random_string() items = [{'p': p, 'c': str(i), 'a': {'x': str(i*10), 'y': 'hi'}, 'b': 'hello' } for i in range(10)] with test_table.batch_writer() as batch: for item in items: batch.put_item(item) got_items = [ x for x in full_scan(test_table, ProjectionExpression="p, a.x") if x['p'] == p] expected_items = [{'p': p, 'a': {'x': x['a']['x']}} for x in items] assert multiset(expected_items) == multiset(got_items)
def test_projection_expression_and_attributes_to_get(test_table_s): p = random_string() test_table_s.put_item(Item={'p': p, 'a': 'hello', 'b': 'hi'}) with pytest.raises(ClientError, match='ValidationException.*both'): test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a', AttributesToGet=['b'])['Item'] with pytest.raises(ClientError, match='ValidationException.*both'): full_scan(test_table_s, ProjectionExpression='a', AttributesToGet=['a']) with pytest.raises(ClientError, match='ValidationException.*both'): full_query(test_table_s, KeyConditions={ 'p': { 'AttributeValueList': [p], 'ComparisonOperator': 'EQ' } }, ProjectionExpression='a', AttributesToGet=['a'])
def find_tracing_session(dynamodb, str): # The tracing session table does not get updated immediately - we may need # to sleep a bit until the requested string appears. We save the previous # session table in last_scan, so if we're looking for sessions of number of # different requests started together, it might be enough to read from disk # the session table just once, and not re-read (and of course, not sleep) # when looking for the other requests. global last_scan trace_sessions_table = dynamodb.Table('.scylla.alternator.system_traces.sessions') start = time.time() if last_scan == None: # The trace tables have RF=2, even on a one-node test setup, and # thus fail reads with ConsistentRead=True (Quorum)... last_scan = full_scan(trace_sessions_table, ConsistentRead=False) while time.time() - start < 30: for entry in last_scan: if str in entry['parameters']: print(f'find_tracing_session time {time.time()-start}') return entry['session_id'] time.sleep(0.3) last_scan = full_scan(trace_sessions_table, ConsistentRead=False) pytest.fail("Couldn't find tracing session")
def test_gsi_simple(test_table_gsi_1): items = [{'p': random_string(), 'c': random_string(), 'x': random_string()} for i in range(10)] with test_table_gsi_1.batch_writer() as batch: for item in items: batch.put_item(item) c = items[0]['c'] # The index allows a query on just a specific sort key, which isn't # allowed on the base table. with pytest.raises(ClientError, match='ValidationException'): full_query(test_table_gsi_1, KeyConditions={'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}}) expected_items = [x for x in items if x['c'] == c] assert_index_query(test_table_gsi_1, 'hello', expected_items, KeyConditions={'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}}) # Scanning the entire table directly or via the index yields the same # results (in different order). assert_index_scan(test_table_gsi_1, 'hello', full_scan(test_table_gsi_1))
def test_gsi_key_not_in_index(test_table_gsi_1_hash_only): # Test with items with different 'c' values: items = [{'p': random_string(), 'c': random_string(), 'x': random_string()} for i in range(10)] with test_table_gsi_1_hash_only.batch_writer() as batch: for item in items: batch.put_item(item) c = items[0]['c'] expected_items = [x for x in items if x['c'] == c] assert_index_query(test_table_gsi_1_hash_only, 'hello', expected_items, KeyConditions={'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}}) # Test items with the same sort key 'c' but different hash key 'p' c = random_string(); items = [{'p': random_string(), 'c': c, 'x': random_string()} for i in range(10)] with test_table_gsi_1_hash_only.batch_writer() as batch: for item in items: batch.put_item(item) assert_index_query(test_table_gsi_1_hash_only, 'hello', items, KeyConditions={'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}}) # Scanning the entire table directly or via the index yields the same # results (in different order). assert_index_scan(test_table_gsi_1_hash_only, 'hello', full_scan(test_table_gsi_1_hash_only))
def test_slow_query_log(with_slow_query_logging, test_table_s, dynamodb): table = test_table_s p = random_string(20) print(f"Traced key: {p}") table.put_item(Item={'p': p}) table.delete_item(Key={'p': p}) # Verify that the operations got logged. Each operation taking more than 0 microseconds is logged, # which effectively logs all requests as slow. slow_query_table = dynamodb.Table('.scylla.alternator.system_traces.node_slow_log') start_time = time.time() while time.time() < start_time + 60: results = full_scan(slow_query_table, ConsistentRead=False) put_item_found = any("PutItem" in result['parameters'] and p in result['parameters'] and result['username'] == "alternator" for result in results) delete_item_found = any("DeleteItem" in result['parameters'] and p in result['parameters'] and result['username'] == "alternator" for result in results) if put_item_found and delete_item_found: return else: time.sleep(0.5) pytest.fail("Slow query entries not found")
def assert_index_scan(table, index_name, expected_items, **kwargs): assert multiset(expected_items) == multiset( full_scan(table, IndexName=index_name, **kwargs))
def test_gsi_strong_consistency(test_table_gsi_1): with pytest.raises(ClientError, match='ValidationException.*Consistent'): full_query(test_table_gsi_1, KeyConditions={'c': {'AttributeValueList': ['hi'], 'ComparisonOperator': 'EQ'}}, IndexName='hello', ConsistentRead=True) with pytest.raises(ClientError, match='ValidationException.*Consistent'): full_scan(test_table_gsi_1, IndexName='hello', ConsistentRead=True)
def test_gsi_missing_index(test_table_gsi_1): with pytest.raises(ClientError, match='ValidationException.*wrong_name'): full_query(test_table_gsi_1, IndexName='wrong_name', KeyConditions={'x': {'AttributeValueList': [1], 'ComparisonOperator': 'EQ'}}) with pytest.raises(ClientError, match='ValidationException.*wrong_name'): full_scan(test_table_gsi_1, IndexName='wrong_name')
def test_filter_expression_scan_partition_key(filled_test_table): table, items = filled_test_table got_items = full_scan(table, FilterExpression='p=:a', ExpressionAttributeValues={':a': '3'}) expected_items = [item for item in items if item['p'] == '3'] assert multiset(expected_items) == multiset(got_items)
def test_scan_parallel_with_exclusive_start_key(filled_test_table): test_table, items = filled_test_table with pytest.raises(ClientError, match='ValidationException.*Exclusive'): full_scan(test_table, TotalSegments=1000000, Segment=0, ExclusiveStartKey={'p': '0', 'c': '0'})
def test_scan_select(filled_test_table): test_table, items = filled_test_table got_items = full_scan(test_table) # By default, a scan returns all the items, with all their attributes: # query returns all attributes: got_items = full_scan(test_table) assert multiset(items) == multiset(got_items) # Select=ALL_ATTRIBUTES does exactly the same as the default - return # all attributes: got_items = full_scan(test_table, Select='ALL_ATTRIBUTES') assert multiset(items) == multiset(got_items) # Select=ALL_PROJECTED_ATTRIBUTES is not allowed on a base table (it # is just for indexes, when IndexName is specified) with pytest.raises(ClientError, match='ValidationException'): full_scan(test_table, Select='ALL_PROJECTED_ATTRIBUTES') # Select=SPECIFIC_ATTRIBUTES requires that either a AttributesToGet # or ProjectionExpression appears, but then really does nothing beyond # what AttributesToGet and ProjectionExpression already do: with pytest.raises(ClientError, match='ValidationException'): full_scan(test_table, Select='SPECIFIC_ATTRIBUTES') wanted = ['c', 'another'] got_items = full_scan(test_table, Select='SPECIFIC_ATTRIBUTES', AttributesToGet=wanted) expected_items = [{k: x[k] for k in wanted if k in x} for x in items] assert multiset(expected_items) == multiset(got_items) got_items = full_scan(test_table, Select='SPECIFIC_ATTRIBUTES', ProjectionExpression=','.join(wanted)) assert multiset(expected_items) == multiset(got_items) # Select=COUNT just returns a count - not any items (got_count, got_items) = full_scan_and_count(test_table, Select='COUNT') assert got_count == len(items) assert got_items == [] # Check that we also get a count in regular scans - not just with # Select=COUNT, but without Select=COUNT we both items and count: (got_count, got_items) = full_scan_and_count(test_table) assert got_count == len(items) assert multiset(items) == multiset(got_items) # Select with some unknown string generates a validation exception: with pytest.raises(ClientError, match='ValidationException'): full_scan(test_table, Select='UNKNOWN')
def test_scan_with_key_equality_filtering(dynamodb, filled_test_table): table, items = filled_test_table scan_filter_p = { "p": { "AttributeValueList": ["7"], "ComparisonOperator": "EQ" } } scan_filter_c = { "c": { "AttributeValueList": ["9"], "ComparisonOperator": "EQ" } } scan_filter_p_and_attribute = { "p": { "AttributeValueList": ["7"], "ComparisonOperator": "EQ" }, "attribute": { "AttributeValueList": ["x" * 7], "ComparisonOperator": "EQ" } } scan_filter_c_and_another = { "c": { "AttributeValueList": ["9"], "ComparisonOperator": "EQ" }, "another": { "AttributeValueList": ["y" * 16], "ComparisonOperator": "EQ" } } # Filtering on the hash key got_items = full_scan(table, ScanFilter=scan_filter_p) expected_items = [ item for item in items if "p" in item.keys() and item["p"] == "7" ] assert multiset(expected_items) == multiset(got_items) # Filtering on the sort key got_items = full_scan(table, ScanFilter=scan_filter_c) expected_items = [ item for item in items if "c" in item.keys() and item["c"] == "9" ] assert multiset(expected_items) == multiset(got_items) # Filtering on the hash key and an attribute got_items = full_scan(table, ScanFilter=scan_filter_p_and_attribute) expected_items = [ item for item in items if "p" in item.keys() and "another" in item.keys() and item["p"] == "7" and item["another"] == "y" * 16 ] assert multiset(expected_items) == multiset(got_items) # Filtering on the sort key and an attribute got_items = full_scan(table, ScanFilter=scan_filter_c_and_another) expected_items = [ item for item in items if "c" in item.keys() and "another" in item.keys() and item["c"] == "9" and item["another"] == "y" * 16 ] assert multiset(expected_items) == multiset(got_items)