Python full_scan Examples, util.full_scan Python Examples

Example #1

0

Show file

def test_scan_sort_order_string(filled_test_table):
    test_table, items = filled_test_table
    got_items = full_scan(test_table)
    assert len(items) == len(got_items)
    # Extract just the sort key ("c") from the partition "long"
    items_long = [x['c'] for x in items if x['p'] == 'long']
    got_items_long = [x['c'] for x in got_items if x['p'] == 'long']
    # Verify that got_items_long are already sorted (in string order)
    assert sorted(got_items_long) == got_items_long
    # Verify that got_items_long are a sorted version of the expected items_long
    assert sorted(items_long) == got_items_long

Example #2

0

Show file

def test_scan_filter_expression(filled_test_table):
    test_table, items = filled_test_table

    got_items = full_scan(test_table,
                          FilterExpression=Attr("attribute").eq("xxxx"))
    print(got_items)
    assert multiset([
        item for item in items
        if 'attribute' in item.keys() and item['attribute'] == 'xxxx'
    ]) == multiset(got_items)

    got_items = full_scan(test_table,
                          FilterExpression=Attr("attribute").eq("xxxx")
                          & Attr("another").eq("yy"))
    print(got_items)
    assert multiset([
        item for item in items
        if 'attribute' in item.keys() and 'another' in item.keys()
        and item['attribute'] == 'xxxx' and item['another'] == 'yy'
    ]) == multiset(got_items)

Example #3

0

Show file

File: test_gsi.py Project: ukayani/scylla

def test_gsi_missing_attribute_3(test_table_gsi_3):
    p = random_string()
    a = random_string()
    b = random_string()
    # First, add an item with a missing "a" value. It should appear in the
    # base table, but not in the index:
    test_table_gsi_3.put_item(Item={'p':  p, 'b': b})
    assert test_table_gsi_3.get_item(Key={'p':  p})['Item'] == {'p': p, 'b': b}
    # Note: with eventually consistent read, we can't really be sure that
    # an item will "never" appear in the index. We hope that if a bug exists
    # and such an item did appear, sometimes the delay here will be enough
    # for the unexpected item to become visible.
    assert not any([i['p'] == p for i in full_scan(test_table_gsi_3, IndexName='hello')])
    # Same thing for an item with a missing "b" value:
    test_table_gsi_3.put_item(Item={'p':  p, 'a': a})
    assert test_table_gsi_3.get_item(Key={'p':  p})['Item'] == {'p': p, 'a': a}
    assert not any([i['p'] == p for i in full_scan(test_table_gsi_3, IndexName='hello')])
    # And for an item missing both:
    test_table_gsi_3.put_item(Item={'p':  p})
    assert test_table_gsi_3.get_item(Key={'p':  p})['Item'] == {'p': p}
    assert not any([i['p'] == p for i in full_scan(test_table_gsi_3, IndexName='hello')])

Example #4

0

Show file

File: test_scan.py Project: yaronkaikov/scylla

def test_scan_parallel(filled_test_table):
    test_table, items = filled_test_table
    for nsegments in [1, 2, 17]:
        print('Testing TotalSegments={}'.format(nsegments))
        got_items = []
        for segment in range(nsegments):
            got_items.extend(
                full_scan(test_table, TotalSegments=nsegments,
                          Segment=segment))
        # The following comparison verifies that each of the expected item
        # in items was returned in one - and just one - of the segments.
        assert multiset(items) == multiset(got_items)

Example #5

0

Show file

def test_scan_attributes_to_get(dynamodb, filled_test_table):
    table, items = filled_test_table
    for wanted in [
        ['another'],  # only non-key attributes (one item doesn't have it!)
        ['c', 'another'],  # a key attribute (sort key) and non-key
        ['p', 'c'],  # entire key
        ['nonexistent']  # none of the items have this attribute!
    ]:
        print(wanted)
        got_items = full_scan(table, AttributesToGet=wanted)
        expected_items = [{k: x[k] for k in wanted if k in x} for x in items]
        assert multiset(expected_items) == multiset(got_items)

Example #6

0

Show file

File: test_tracing.py Project: yangly0815/scylla

def test_tracing_all(with_tracing, test_table_s_isolation_always, dynamodb):
    # Run the different requests, each one containing a long random string
    # that we can later use to find with find_tracing_session():

    table = test_table_s_isolation_always
    # PutItem:
    p_putitem = random_string(20)
    table.put_item(Item={'p': p_putitem})
    # GetItem:
    p_getitem = random_string(20)
    table.get_item(Key={'p': p_getitem})
    # DeleteItem:
    p_deleteitem = random_string(20)
    table.delete_item(Key={'p': p_deleteitem})
    # UpdateItem:
    p_updateitem = random_string(20)
    table.update_item(Key={'p': p_updateitem}, AttributeUpdates={})
    # BatchGetItem:
    p_batchgetitem = random_string(20)
    table.meta.client.batch_get_item(RequestItems = {table.name: {'Keys': [{'p': p_batchgetitem}]}})
    # BatchWriteItem:
    p_batchwriteitem = random_string(20)
    table.meta.client.batch_write_item(RequestItems = {table.name: [{'PutRequest':  {'Item': {'p': p_batchwriteitem}}}]})
    # Query:
    p_query = random_string(20)
    full_query(table, KeyConditionExpression='p = :p', ExpressionAttributeValues={':p': p_query})
    # Scan:
    p_scan = random_string(20)
    full_scan(table, FilterExpression='p = :p', ExpressionAttributeValues={':p': p_scan})

    # Check the traces. NOTE: the following checks are fairly arbitrary, and
    # may break in the future if we change the tracing messages...
    expect_tracing_events(dynamodb, p_putitem, ['PutItem', 'CAS successful'])
    expect_tracing_events(dynamodb, p_getitem, ['GetItem', 'Querying is done'])
    expect_tracing_events(dynamodb, p_deleteitem, ['DeleteItem', 'CAS successful'])
    expect_tracing_events(dynamodb, p_updateitem, ['UpdateItem', 'CAS successful'])
    expect_tracing_events(dynamodb, p_batchgetitem, ['BatchGetItem', 'Querying is done'])
    expect_tracing_events(dynamodb, p_batchwriteitem, ['BatchWriteItem', 'CAS successful'])
    expect_tracing_events(dynamodb, p_query, ['Query', 'Querying is done'])
    expect_tracing_events(dynamodb, p_scan, ['Scan', 'Performing a database query'])

Example #7

0

Show file

File: test_gsi.py Project: ukayani/scylla

def test_gsi_missing_attribute(test_table_gsi_2):
    p1 = random_string()
    x1 = random_string()
    test_table_gsi_2.put_item(Item={'p':  p1, 'x': x1})
    p2 = random_string()
    test_table_gsi_2.put_item(Item={'p':  p2})

    # Both items are now in the base table:
    assert test_table_gsi_2.get_item(Key={'p':  p1})['Item'] == {'p': p1, 'x': x1}
    assert test_table_gsi_2.get_item(Key={'p':  p2})['Item'] == {'p': p2}

    # But only the first item is in the index: It can be found using a
    # Query, and a scan of the index won't find it (but a scan on the base
    # will).
    assert_index_query(test_table_gsi_2, 'hello', [{'p': p1, 'x': x1}],
        KeyConditions={'x': {'AttributeValueList': [x1], 'ComparisonOperator': 'EQ'}})
    assert any([i['p'] == p1 for i in full_scan(test_table_gsi_2)])
    # Note: with eventually consistent read, we can't really be sure that
    # and item will "never" appear in the index. We do this test last,
    # so if we had a bug and such item did appear, hopefully we had enough
    # time for the bug to become visible. At least sometimes.
    assert not any([i['p'] == p2 for i in full_scan(test_table_gsi_2, IndexName='hello')])

Example #8

0

Show file

File: test_gsi.py Project: ukayani/scylla

def test_gsi_identical(dynamodb):
    table = create_test_table(dynamodb,
        KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' }],
        AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }],
        GlobalSecondaryIndexes=[
            {   'IndexName': 'hello',
                'KeySchema': [{ 'AttributeName': 'p', 'KeyType': 'HASH' }],
                'Projection': { 'ProjectionType': 'ALL' }
            }
        ])
    items = [{'p': random_string(), 'x': random_string()} for i in range(10)]
    with table.batch_writer() as batch:
        for item in items:
            batch.put_item(item)
    # Scanning the entire table directly or via the index yields the same
    # results (in different order).
    assert multiset(items) == multiset(full_scan(table))
    assert_index_scan(table, 'hello', items)
    # We can't scan a non-existent index
    with pytest.raises(ClientError, match='ValidationException'):
        full_scan(table, IndexName='wrong')
    table.delete()

Example #9

0

Show file

File: test_projection_expression.py Project: yfxu1990/scylla

def test_scan_projection_expression_path(test_table):
    # This test is similar to test_query_projection_expression_path above,
    # but uses a scan instead of a query. The scan will generate unrelated
    # partitions created by other tests (hopefully not too many...) that we
    # need to ignore. We also need to ask for "p" too, so we can filter by it.
    p = random_string()
    items = [{'p': p, 'c': str(i), 'a': {'x': str(i*10), 'y': 'hi'}, 'b': 'hello' } for i in range(10)]
    with test_table.batch_writer() as batch:
        for item in items:
            batch.put_item(item)
    got_items = [ x for x in full_scan(test_table, ProjectionExpression="p, a.x") if x['p'] == p]
    expected_items = [{'p': p, 'a': {'x': x['a']['x']}} for x in items]
    assert multiset(expected_items) == multiset(got_items)

Example #10

0

Show file

def test_projection_expression_and_attributes_to_get(test_table_s):
    p = random_string()
    test_table_s.put_item(Item={'p': p, 'a': 'hello', 'b': 'hi'})
    with pytest.raises(ClientError, match='ValidationException.*both'):
        test_table_s.get_item(Key={'p': p},
                              ConsistentRead=True,
                              ProjectionExpression='a',
                              AttributesToGet=['b'])['Item']
    with pytest.raises(ClientError, match='ValidationException.*both'):
        full_scan(test_table_s,
                  ProjectionExpression='a',
                  AttributesToGet=['a'])
    with pytest.raises(ClientError, match='ValidationException.*both'):
        full_query(test_table_s,
                   KeyConditions={
                       'p': {
                           'AttributeValueList': [p],
                           'ComparisonOperator': 'EQ'
                       }
                   },
                   ProjectionExpression='a',
                   AttributesToGet=['a'])

Example #11

0

Show file

File: test_tracing.py Project: yangly0815/scylla

def find_tracing_session(dynamodb, str):
    # The tracing session table does not get updated immediately - we may need
    # to sleep a bit until the requested string appears. We save the previous
    # session table in last_scan, so if we're looking for sessions of number of
    # different requests started together, it might be enough to read from disk
    # the session table just once, and not re-read (and of course, not sleep)
    # when looking for the other requests.
    global last_scan
    trace_sessions_table = dynamodb.Table('.scylla.alternator.system_traces.sessions')
    start = time.time()
    if last_scan == None:
        # The trace tables have RF=2, even on a one-node test setup, and
        # thus fail reads with ConsistentRead=True (Quorum)...
        last_scan = full_scan(trace_sessions_table, ConsistentRead=False)
    while time.time() - start < 30:
        for entry in last_scan:
            if str in entry['parameters']:
                print(f'find_tracing_session time {time.time()-start}')
                return entry['session_id']
        time.sleep(0.3)
        last_scan = full_scan(trace_sessions_table, ConsistentRead=False)
    pytest.fail("Couldn't find tracing session")

Example #12

0

Show file

File: test_gsi.py Project: ukayani/scylla

def test_gsi_simple(test_table_gsi_1):
    items = [{'p': random_string(), 'c': random_string(), 'x': random_string()} for i in range(10)]
    with test_table_gsi_1.batch_writer() as batch:
        for item in items:
            batch.put_item(item)
    c = items[0]['c']
    # The index allows a query on just a specific sort key, which isn't
    # allowed on the base table.
    with pytest.raises(ClientError, match='ValidationException'):
        full_query(test_table_gsi_1, KeyConditions={'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}})
    expected_items = [x for x in items if x['c'] == c]
    assert_index_query(test_table_gsi_1, 'hello', expected_items,
        KeyConditions={'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}})
    # Scanning the entire table directly or via the index yields the same
    # results (in different order).
    assert_index_scan(test_table_gsi_1, 'hello', full_scan(test_table_gsi_1))

Example #13

0

Show file

File: test_gsi.py Project: ukayani/scylla

def test_gsi_key_not_in_index(test_table_gsi_1_hash_only):
    # Test with items with different 'c' values:
    items = [{'p': random_string(), 'c': random_string(), 'x': random_string()} for i in range(10)]
    with test_table_gsi_1_hash_only.batch_writer() as batch:
        for item in items:
            batch.put_item(item)
    c = items[0]['c']
    expected_items = [x for x in items if x['c'] == c]
    assert_index_query(test_table_gsi_1_hash_only, 'hello', expected_items,
        KeyConditions={'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}})
    # Test items with the same sort key 'c' but different hash key 'p'
    c = random_string();
    items = [{'p': random_string(), 'c': c, 'x': random_string()} for i in range(10)]
    with test_table_gsi_1_hash_only.batch_writer() as batch:
        for item in items:
            batch.put_item(item)
    assert_index_query(test_table_gsi_1_hash_only, 'hello', items,
        KeyConditions={'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}})
    # Scanning the entire table directly or via the index yields the same
    # results (in different order).
    assert_index_scan(test_table_gsi_1_hash_only, 'hello', full_scan(test_table_gsi_1_hash_only))

Example #14

0

Show file

File: test_tracing.py Project: yaronkaikov/scylla

def test_slow_query_log(with_slow_query_logging, test_table_s, dynamodb):
    table = test_table_s
    p = random_string(20)
    print(f"Traced key: {p}")
    table.put_item(Item={'p': p})
    table.delete_item(Key={'p': p})
    # Verify that the operations got logged. Each operation taking more than 0 microseconds is logged,
    # which effectively logs all requests as slow.
    slow_query_table = dynamodb.Table('.scylla.alternator.system_traces.node_slow_log')
    start_time = time.time()
    while time.time() < start_time + 60:
        results = full_scan(slow_query_table, ConsistentRead=False)
        put_item_found = any("PutItem" in result['parameters'] and p in result['parameters']
                and result['username'] == "alternator" for result in results)
        delete_item_found = any("DeleteItem" in result['parameters'] and p in result['parameters']
                and result['username'] == "alternator" for result in results)
        if put_item_found and delete_item_found:
            return
        else:
            time.sleep(0.5)
    pytest.fail("Slow query entries not found")

Example #15

0

Show file

def assert_index_scan(table, index_name, expected_items, **kwargs):
    assert multiset(expected_items) == multiset(
        full_scan(table, IndexName=index_name, **kwargs))

Example #16

0

Show file

File: test_gsi.py Project: ukayani/scylla

def test_gsi_strong_consistency(test_table_gsi_1):
    with pytest.raises(ClientError, match='ValidationException.*Consistent'):
        full_query(test_table_gsi_1, KeyConditions={'c': {'AttributeValueList': ['hi'], 'ComparisonOperator': 'EQ'}}, IndexName='hello', ConsistentRead=True)
    with pytest.raises(ClientError, match='ValidationException.*Consistent'):
        full_scan(test_table_gsi_1, IndexName='hello', ConsistentRead=True)

Example #17

0

Show file

File: test_gsi.py Project: ukayani/scylla

def test_gsi_missing_index(test_table_gsi_1):
    with pytest.raises(ClientError, match='ValidationException.*wrong_name'):
        full_query(test_table_gsi_1, IndexName='wrong_name',
            KeyConditions={'x': {'AttributeValueList': [1], 'ComparisonOperator': 'EQ'}})
    with pytest.raises(ClientError, match='ValidationException.*wrong_name'):
        full_scan(test_table_gsi_1, IndexName='wrong_name')

Example #18

0

Show file

File: test_filter_expression.py Project: yangly0815/scylla

def test_filter_expression_scan_partition_key(filled_test_table):
    table, items = filled_test_table
    got_items = full_scan(table, FilterExpression='p=:a',
        ExpressionAttributeValues={':a': '3'})
    expected_items = [item for item in items if item['p'] == '3']
    assert multiset(expected_items) == multiset(got_items)

Example #19

0

Show file

def test_scan_parallel_with_exclusive_start_key(filled_test_table):
    test_table, items = filled_test_table
    with pytest.raises(ClientError, match='ValidationException.*Exclusive'):
        full_scan(test_table, TotalSegments=1000000, Segment=0, ExclusiveStartKey={'p': '0', 'c': '0'})

Example #20

0

Show file

def test_scan_select(filled_test_table):
    test_table, items = filled_test_table
    got_items = full_scan(test_table)
    # By default, a scan returns all the items, with all their attributes:
    # query returns all attributes:
    got_items = full_scan(test_table)
    assert multiset(items) == multiset(got_items)
    # Select=ALL_ATTRIBUTES does exactly the same as the default - return
    # all attributes:
    got_items = full_scan(test_table, Select='ALL_ATTRIBUTES')
    assert multiset(items) == multiset(got_items)
    # Select=ALL_PROJECTED_ATTRIBUTES is not allowed on a base table (it
    # is just for indexes, when IndexName is specified)
    with pytest.raises(ClientError, match='ValidationException'):
        full_scan(test_table, Select='ALL_PROJECTED_ATTRIBUTES')
    # Select=SPECIFIC_ATTRIBUTES requires that either a AttributesToGet
    # or ProjectionExpression appears, but then really does nothing beyond
    # what AttributesToGet and ProjectionExpression already do:
    with pytest.raises(ClientError, match='ValidationException'):
        full_scan(test_table, Select='SPECIFIC_ATTRIBUTES')
    wanted = ['c', 'another']
    got_items = full_scan(test_table, Select='SPECIFIC_ATTRIBUTES', AttributesToGet=wanted)
    expected_items = [{k: x[k] for k in wanted if k in x} for x in items]
    assert multiset(expected_items) == multiset(got_items)
    got_items = full_scan(test_table, Select='SPECIFIC_ATTRIBUTES', ProjectionExpression=','.join(wanted))
    assert multiset(expected_items) == multiset(got_items)
    # Select=COUNT just returns a count - not any items
    (got_count, got_items) = full_scan_and_count(test_table, Select='COUNT')
    assert got_count == len(items)
    assert got_items == []
    # Check that we also get a count in regular scans - not just with
    # Select=COUNT, but without Select=COUNT we both items and count:
    (got_count, got_items) = full_scan_and_count(test_table)
    assert got_count == len(items)
    assert multiset(items) == multiset(got_items)
    # Select with some unknown string generates a validation exception:
    with pytest.raises(ClientError, match='ValidationException'):
        full_scan(test_table, Select='UNKNOWN')

Example #21

0

Show file

def test_scan_with_key_equality_filtering(dynamodb, filled_test_table):
    table, items = filled_test_table
    scan_filter_p = {
        "p": {
            "AttributeValueList": ["7"],
            "ComparisonOperator": "EQ"
        }
    }
    scan_filter_c = {
        "c": {
            "AttributeValueList": ["9"],
            "ComparisonOperator": "EQ"
        }
    }
    scan_filter_p_and_attribute = {
        "p": {
            "AttributeValueList": ["7"],
            "ComparisonOperator": "EQ"
        },
        "attribute": {
            "AttributeValueList": ["x" * 7],
            "ComparisonOperator": "EQ"
        }
    }
    scan_filter_c_and_another = {
        "c": {
            "AttributeValueList": ["9"],
            "ComparisonOperator": "EQ"
        },
        "another": {
            "AttributeValueList": ["y" * 16],
            "ComparisonOperator": "EQ"
        }
    }

    # Filtering on the hash key
    got_items = full_scan(table, ScanFilter=scan_filter_p)
    expected_items = [
        item for item in items if "p" in item.keys() and item["p"] == "7"
    ]
    assert multiset(expected_items) == multiset(got_items)

    # Filtering on the sort key
    got_items = full_scan(table, ScanFilter=scan_filter_c)
    expected_items = [
        item for item in items if "c" in item.keys() and item["c"] == "9"
    ]
    assert multiset(expected_items) == multiset(got_items)

    # Filtering on the hash key and an attribute
    got_items = full_scan(table, ScanFilter=scan_filter_p_and_attribute)
    expected_items = [
        item for item in items
        if "p" in item.keys() and "another" in item.keys() and item["p"] == "7"
        and item["another"] == "y" * 16
    ]
    assert multiset(expected_items) == multiset(got_items)

    # Filtering on the sort key and an attribute
    got_items = full_scan(table, ScanFilter=scan_filter_c_and_another)
    expected_items = [
        item for item in items
        if "c" in item.keys() and "another" in item.keys() and item["c"] == "9"
        and item["another"] == "y" * 16
    ]
    assert multiset(expected_items) == multiset(got_items)