Example #1
0
def test_load_children(session):
    description = stream_description(5, {
        0: 1,
        1: [2, 3]
    },
                                     stream_arn="stream-arn")
    session.describe_stream.return_value = description

    # First shard in the description is unrelated to the root
    root = Shard(stream_arn="stream-arn",
                 shard_id=description["Shards"][0]["ShardId"],
                 session=session)
    assert not root.children

    # 0 -> 1 -> 2
    #        -> 3
    # 4
    child_id = description["Shards"][1]["ShardId"]
    first_grandchild_id = description["Shards"][2]["ShardId"]
    second_grandchild_id = description["Shards"][3]["ShardId"]

    # Loading shouldn't rely on implicit ordering
    random.shuffle(description["Shards"])
    root.load_children()

    assert set(s.shard_id for s in root.children) == {child_id}
    assert root.children[0].shard_id == child_id
    grandchild_ids = [s.shard_id for s in root.children[0].children]
    assert set(grandchild_ids) == {first_grandchild_id, second_grandchild_id}

    session.describe_stream.assert_called_once_with(stream_arn="stream-arn",
                                                    first_shard=root.shard_id)
Example #2
0
def test_apply_records(initial_sequence_number, record_count, session):
    # Temporarily ignoring that an iterator should never be "latest" and have a sequence_number..
    shard = Shard(stream_arn="stream-arn",
                  shard_id="shard-id",
                  iterator_type="initial-iterator-type",
                  sequence_number=initial_sequence_number,
                  session=session)

    records = [
        dynamodb_record_with(key=True, sequence_number=i)
        for i in range(record_count)
    ]
    response = {"Records": records, "NextShardIterator": "next-iterator-id"}
    shard._apply_get_records_response(response)
    session.get_stream_records.assert_not_called()

    if records:
        if initial_sequence_number:
            # Don't overwrite; found records but already had a sequence_number
            assert shard.iterator_type == "initial-iterator-type"
            assert shard.sequence_number == initial_sequence_number
        else:
            # Remember first sequence_number; found records and no existing sequence_number
            assert shard.iterator_type == "at_sequence"
            assert shard.sequence_number == records[0]["dynamodb"][
                "SequenceNumber"] == 0
        assert shard.empty_responses == 0
    else:
        # No records, no change
        assert shard.iterator_type == "initial-iterator-type"
        assert shard.sequence_number == initial_sequence_number
        assert shard.empty_responses == 1
Example #3
0
def test_eq_not_set_or_different(attr):
    parent = Shard(stream_arn="parent-arn", shard_id="parent-id")
    children = [
        Shard(stream_arn="child-arn", shard_id="child-id") for _ in range(2)
    ]
    kwargs = {
        "stream_arn": "stream-arn",
        "shard_id": "shard-id",
        "iterator_id": "iterator-id",
        "iterator_type": "iterator-type",
        "sequence_number": "sequence-number",
        "parent": parent
    }
    shard = Shard(**kwargs)
    other = Shard(**kwargs)
    # Initially equal
    assert shard == other
    assert other == shard

    shard.children.extend(children)
    assert not shard == other
    assert not other == shard

    # Compare equal regardless of order
    other.children.extend(children[::-1])
    assert shard == other
    assert other == shard

    setattr(other, attr, "something else")
    assert not shard == other
    assert not other == shard
Example #4
0
def test_token(caplog):
    parent = Shard(stream_arn="parent-stream-arn", shard_id="parent-id")
    shard = Shard(stream_arn="stream-arn",
                  shard_id="shard-id",
                  iterator_id="iterator-id",
                  iterator_type="at_sequence",
                  sequence_number="sequence-number",
                  parent=parent)
    expected = {
        "stream_arn": "stream-arn",
        "shard_id": "shard-id",
        "iterator_type": "at_sequence",
        "sequence_number": "sequence-number",
        "parent": "parent-id"
    }
    assert shard.token == expected

    # Removing parent omits it from the token entirely
    shard.parent = None
    expected.pop("parent")
    assert shard.token == expected
    assert not caplog.records

    shard.iterator_type = "trim_horizon"
    shard.token
    assert caplog.record_tuples == [
        ("bloop.stream", logging.WARNING,
         "creating shard token at non-exact location \"trim_horizon\"")
    ]
Example #5
0
def test_remove_shard(is_active, is_root, has_buffered, coordinator):
    shard = Shard(stream_arn=coordinator.stream_arn, shard_id="shard-id",
                  iterator_type="at_sequence", sequence_number="13")
    # Always has a buffered record
    other = Shard(stream_arn=coordinator.stream_arn, shard_id="other-shard-id",
                  iterator_type="after_sequence", sequence_number="200")
    children = [Shard(stream_arn="child-arn", shard_id="child-" + str(i)) for i in range(4)]
    shard.children.extend(children)

    if is_active:
        coordinator.active.append(shard)
    if is_root:
        coordinator.roots.append(shard)
    if has_buffered:
        records = [local_record(sequence_number=str(i)) for i in range(7)]
        coordinator.buffer.push_all((r, shard) for r in records)
    coordinator.buffer.push(local_record(sequence_number="200"), other)

    coordinator.remove_shard(shard, drop_buffered_records=True)

    if is_active:
        assert all(child in coordinator.active for child in children)
    if is_root:
        assert all(child in coordinator.roots for child in children)

    # Any records that were buffered from the removed shard are gone.
    while coordinator.buffer:
        record, record_shard = coordinator.buffer.pop()
        assert record_shard is not shard
Example #6
0
def build_shards(n, shape=None, session=None, stream_arn=None, shard_id_prefix=""):
    """Shape describes the parent/child relationships.

    a -> b -> c -> d
           -> e -> f

    is expressed as:

    build_shards(session, 6, {0: 1, 1: [2, 3], 2: 4, 3: 5})
    """
    # Default to flat shards, no hierarchy
    shape = shape or {}
    shard_id = lambda i: "{}shard-id-{}".format(shard_id_prefix + "-" if shard_id_prefix else "", i)
    shards = [
        Shard(stream_arn=stream_arn, shard_id=shard_id(i), session=session)
        for i in range(n)
    ]
    for shard_index, child_indexes in shape.items():
        if isinstance(child_indexes, int):
            shards[shard_index].children.append(shards[child_indexes])
            shards[child_indexes].parent = shards[shard_index]
        else:
            for child_index in child_indexes:
                shards[shard_index].children.append(shards[child_index])
                shards[child_index].parent = shards[shard_index]

    return shards
Example #7
0
def test_token():
    parent = Shard(stream_arn="parent-stream-arn", shard_id="parent-id")
    shard = Shard(stream_arn="stream-arn",
                  shard_id="shard-id",
                  iterator_id="iterator-id",
                  iterator_type="at_sequence",
                  sequence_number="sequence-number",
                  parent=parent)
    expected = {
        "stream_arn": "stream-arn",
        "shard_id": "shard-id",
        "iterator_type": "at_sequence",
        "sequence_number": "sequence-number",
        "parent": "parent-id"
    }
    assert shard.token == expected

    # Removing parent omits it from the token entirely
    shard.parent = None
    expected.pop("parent")
    assert shard.token == expected
Example #8
0
def test_token_closed_records(coordinator, session):
    """
    When a shard is closed, the last set of records is still buffered even though the shard is no longer tracked.
    The token must include the closed shard until its buffered records are consumed.

    https://github.com/numberoverzero/bloop/issues/111
    """
    closed_shard = Shard(
        stream_arn=coordinator.stream_arn,
        shard_id="closed-shard-id",
        iterator_id="closed-iter-id",
        session=session)
    coordinator.active = [closed_shard]

    session.get_stream_records.return_value = {
        "Records": [
            dynamodb_record_with(sequence_number=123, key=True),
            dynamodb_record_with(sequence_number=456, key=True),
            dynamodb_record_with(sequence_number=789, key=True)
        ]
        # last records so no NextShardIterator
    }

    # called when the coordinator
    session.describe_stream.return_value = {
        "Shards": [],
        "StreamArn": coordinator.stream_arn
    }

    initial_token = coordinator.token
    assert initial_token == {
        "stream_arn": "stream-arn",
        "active": ["closed-shard-id"],
        "shards": []
    }

    record = next(coordinator)
    assert record["meta"]["sequence_number"] == "123"
    assert coordinator.closed[closed_shard] == len(coordinator.buffer) == 2

    # the token should still include the shard in "active", and the "shards"
    # list should contain a pointer to the sequence number 123
    token = coordinator.token
    assert token == {
        "stream_arn": "stream-arn",
        "active": ["closed-shard-id"],
        "shards": [{
            "iterator_type": "after_sequence",
            "sequence_number": "123",
            "shard_id": "closed-shard-id",
        }]
    }
def test_move_to_old_token(coordinator, shard, session):
    """Can't rebuild from a token with shards that have no connection to the current generation"""
    root = Shard(stream_arn=coordinator.stream_arn, shard_id="parent-shard")
    shard.parent = root
    root.children.append(shard)
    coordinator.active.append(shard)
    coordinator.roots.append(root)
    token = coordinator.token

    # There is no lineage that connects the shard_id from the token to the shards in the stream description.
    session.describe_stream.return_value = stream_description(1)

    with pytest.raises(InvalidStream):
        coordinator.move_to(token)
Example #10
0
def test_heartbeat_until_sequence_number(coordinator, session):
    """After heartbeat() finds records for a shard, the shard doesn't check during the next heartbeat."""
    shard = Shard(stream_arn=coordinator.stream_arn, shard_id="shard-id", session=session,
                  iterator_id="iterator-id", iterator_type="latest")
    coordinator.active.append(shard)

    session.get_stream_records.side_effect = build_get_records_responses(1, 0)

    # First call fetches records from DynamoDB
    coordinator.heartbeat()
    assert coordinator.buffer
    assert shard.sequence_number is not None
    session.get_stream_records.assert_called_once_with("iterator-id")

    # Second call skips the shard, since it now has a sequence_number.
    coordinator.heartbeat()
    assert session.get_stream_records.call_count == 1
Example #11
0
def test_move_to_old_token(coordinator, shard, session, caplog):
    """Can't rebuild from a token with shards that have no connection to the current generation"""
    root = Shard(stream_arn=coordinator.stream_arn, shard_id="parent-shard")
    shard.parent = root
    root.children.append(shard)
    coordinator.active.append(shard)
    coordinator.roots.append(root)
    token = coordinator.token

    # There is no lineage that connects the shard_id from the token to the shards in the stream description.
    session.describe_stream.return_value = stream_description(1)

    with pytest.raises(InvalidStream):
        coordinator.move_to(token)

    assert caplog.record_tuples == [
        ("bloop.stream", logging.INFO, "Unknown or expired shard \"parent-shard\" - pruning from stream token"),
        ("bloop.stream", logging.INFO, "Unknown or expired shard \"shard-id\" - pruning from stream token"),
    ]
Example #12
0
def test_buffer_closed_records(coordinator, session):
    """
    When a shard is closed, the last set of records is still buffered even though the shard is no longer tracked.
    https://github.com/numberoverzero/bloop/issues/111
    """
    closed_shard = Shard(
        stream_arn=coordinator.stream_arn,
        shard_id="closed-shard-id",
        iterator_id="closed-iter-id",
        session=session)
    coordinator.active = [closed_shard]

    session.get_stream_records.return_value = {
        "Records": [
            dynamodb_record_with(sequence_number=123, key=True),
            dynamodb_record_with(sequence_number=456, key=True),
            dynamodb_record_with(sequence_number=789, key=True)
        ]
        # last records so no NextShardIterator
    }

    # called when the coordinator
    session.describe_stream.return_value = {
        "Shards": [],
        "StreamArn": coordinator.stream_arn
    }

    assert not coordinator.closed

    record = next(coordinator)
    assert not coordinator.active
    assert record["meta"]["sequence_number"] == "123"
    assert len(coordinator.buffer) == coordinator.closed[closed_shard] == 2

    record = next(coordinator)
    assert record["meta"]["sequence_number"] == "456"
    assert len(coordinator.buffer) == coordinator.closed[closed_shard] == 1

    record = next(coordinator)
    assert record["meta"]["sequence_number"] == "789"
    assert not coordinator.buffer
    assert not coordinator.closed
Example #13
0
def test_repr(expected, kwargs):
    shard = Shard(stream_arn="stream-arn", shard_id="shard-id", **kwargs)
    assert repr(shard) == expected
Example #14
0
def shard(session, stream_arn, shard_id):
    return Shard(stream_arn=stream_arn, shard_id=shard_id, session=session)
Example #15
0
def test_advance_removes_exhausted(has_children, loads_children, coordinator, shard, session):
    """Exhausted shards are removed; any children are promoted, and reset to trim_horizon"""
    shard.iterator_id = last_iterator
    shard.iterator_type = "latest"

    coordinator.active.append(shard)

    if has_children:
        # Already loaded, doesn't need to call DescribeStream
        child = Shard(
            stream_arn=coordinator.stream_arn, shard_id="child-id", parent=shard,
            iterator_type="at_sequence", sequence_number="sequence-number",
            session=session)
        shard.children.append(child)
    elif loads_children:
        # Child exists, but isn't known locally
        session.describe_stream.return_value = {
            "Shards": [{
                "SequenceNumberRange": {
                    "EndingSequenceNumber": "820400000000000001192334",
                    "StartingSequenceNumber": "820400000000000001192334"
                },
                "ShardId": "child-id",
                "ParentShardId": "shard-id"
            }],
            "StreamArn": coordinator.stream_arn
        }
    else:
        # No children
        session.describe_stream.return_value = {
            "Shards": [],
            "StreamArn": coordinator.stream_arn
        }

    coordinator.advance_shards()

    # No records found
    assert not coordinator.buffer
    # No longer active
    assert shard not in coordinator.active

    if has_children:
        # Children are already loaded, no need to DescribeStream
        session.describe_stream.assert_not_called()
    else:
        # No children locally, DescribeStream tried to find some
        session.describe_stream.assert_called_once_with(
            stream_arn=coordinator.stream_arn,
            first_shard=shard.shard_id)

    # Children (pre-existing or found in DescribeStream) are active
    if has_children or loads_children:
        assert len(coordinator.active) == 1
        assert coordinator.active[0].parent is shard

        # Part of promoting the child is resetting it to trim_horizon
        session.get_shard_iterator.assert_called_once_with(
            stream_arn=coordinator.stream_arn,
            shard_id="child-id",
            iterator_type="trim_horizon",
            sequence_number=None
        )
    # Without a child, there's no need to get a new iterator
    else:
        session.get_shard_iterator.assert_not_called()