Ejemplo n.º 1
0
async def fixture_registry_async(
    tmp_path: Path, kafka_server: Optional[KafkaConfig]
) -> AsyncIterator[KarapaceSchemaRegistry]:
    if not kafka_server:
        assert REGISTRY_URI in os.environ or REST_URI in os.environ
        instance, _ = mock_factory("registry")()
        yield instance
    else:
        config_path = tmp_path / "karapace_config.json"
        kafka_port = kafka_server.kafka_port

        config = set_config_defaults({
            "log_level":
            "WARNING",
            "bootstrap_uri":
            f"127.0.0.1:{kafka_port}",
            "topic_name":
            new_random_name(),
            "group_id":
            new_random_name("schema_registry")
        })
        write_config(config_path, config)
        registry = KarapaceSchemaRegistry(config_file_path=str(config_path),
                                          config=set_config_defaults(config))
        await registry.get_master()
        try:
            yield registry
        finally:
            registry.close()
def test_master_selection(kafka_servers: KafkaServers, strategy: str) -> None:
    # Use random port to allow for parallel runs.
    port1 = get_random_port(port_range=TESTS_PORT_RANGE, blacklist=[])
    port2 = get_random_port(port_range=TESTS_PORT_RANGE, blacklist=[port1])
    port_aa, port_bb = sorted((port1, port2))
    client_id_aa = new_random_name("master_selection_aa_")
    client_id_bb = new_random_name("master_selection_bb_")
    group_id = new_random_name("group_id")

    config_aa = set_config_defaults({
        "advertised_hostname": "127.0.0.1",
        "bootstrap_uri": kafka_servers.bootstrap_servers,
        "client_id": client_id_aa,
        "group_id": group_id,
        "port": port_aa,
        "master_election_strategy": strategy,
    })
    config_bb = set_config_defaults({
        "advertised_hostname": "127.0.0.1",
        "bootstrap_uri": kafka_servers.bootstrap_servers,
        "client_id": client_id_bb,
        "group_id": group_id,
        "port": port_bb,
        "master_election_strategy": strategy,
    })

    with closing(init_admin(config_aa)) as mc_aa, closing(init_admin(config_bb)) as mc_bb:
        if strategy == "lowest":
            master = mc_aa
            slave = mc_bb
        else:
            master = mc_bb
            slave = mc_aa

        # Wait for the election to happen
        while not is_master(master):
            time.sleep(0.3)

        while not has_master(slave):
            time.sleep(0.3)

        # Make sure the end configuration is as expected
        master_url = f'http://{master.config["host"]}:{master.config["port"]}'
        assert master.sc.election_strategy == strategy
        assert slave.sc.election_strategy == strategy
        assert master.sc.master_url == master_url
        assert slave.sc.master_url == master_url
Ejemplo n.º 3
0
def fixture_registry_async_pair(tmp_path: Path,
                                kafka_server: Optional[KafkaConfig]):
    assert kafka_server, f"registry_async_pair can not be used if the env variable `{REGISTRY_URI}` or `{REST_URI}` is set"

    master_config_path = tmp_path / "karapace_config_master.json"
    slave_config_path = tmp_path / "karapace_config_slave.json"
    master_port = get_random_port(port_range=REGISTRY_PORT_RANGE, blacklist=[])
    slave_port = get_random_port(port_range=REGISTRY_PORT_RANGE,
                                 blacklist=[master_port])
    kafka_port = kafka_server.kafka_port
    topic_name = new_random_name("schema_pairs")
    group_id = new_random_name("schema_pairs")
    write_config(
        master_config_path, {
            "log_level": "WARNING",
            "bootstrap_uri": f"127.0.0.1:{kafka_port}",
            "topic_name": topic_name,
            "group_id": group_id,
            "advertised_hostname": "127.0.0.1",
            "karapace_registry": True,
            "port": master_port,
        })
    write_config(
        slave_config_path, {
            "log_level": "WARNING",
            "bootstrap_uri": f"127.0.0.1:{kafka_port}",
            "topic_name": topic_name,
            "group_id": group_id,
            "advertised_hostname": "127.0.0.1",
            "karapace_registry": True,
            "port": slave_port,
        })
    master_process = Popen(
        ["python", "-m", "karapace.karapace_all",
         str(master_config_path)])
    slave_process = Popen(
        ["python", "-m", "karapace.karapace_all",
         str(slave_config_path)])
    try:
        wait_for_port(master_port)
        wait_for_port(slave_port)
        yield f"http://127.0.0.1:{master_port}", f"http://127.0.0.1:{slave_port}"
    finally:
        master_process.kill()
        slave_process.kill()
Ejemplo n.º 4
0
async def insert_data(c):
    subject = new_random_name("subject")
    res = await c.post(
        "subjects/{}/versions".format(subject),
        json={"schema": '{"type": "string"}'},
    )
    assert res.status == 200
    assert "id" in res.json()
    return subject
Ejemplo n.º 5
0
async def fixture_registry_async(session_tmpdir, kafka_server):
    if REGISTRY_URI in os.environ or REST_URI in os.environ:
        instance, _ = mock_factory("registry")()
        yield instance
    else:
        config_path = os.path.join(session_tmpdir(), "karapace_config.json")
        kafka_port = kafka_server["kafka_port"]
        write_config(
            config_path, {
                "log_level": "WARNING",
                "bootstrap_uri": f"127.0.0.1:{kafka_port}",
                "topic_name": new_random_name(),
                "group_id": new_random_name("schema_registry")
            })
        registry = KarapaceSchemaRegistry(config_path)
        await registry.get_master()
        try:
            yield registry
        finally:
            registry.close()
def test_no_eligible_master(kafka_servers: KafkaServers) -> None:
    client_id = new_random_name("master_selection_")
    group_id = new_random_name("group_id")

    config_aa = set_config_defaults({
        "advertised_hostname": "127.0.0.1",
        "bootstrap_uri": kafka_servers.bootstrap_servers,
        "client_id": client_id,
        "group_id": group_id,
        "port": get_random_port(port_range=TESTS_PORT_RANGE, blacklist=[]),
        "master_eligibility": False,
    })

    with closing(init_admin(config_aa)) as mc:
        # Wait for the election to happen, ie. flag is not None
        while not mc.sc or mc.sc.are_we_master is None:
            time.sleep(0.3)

        # Make sure the end configuration is as expected
        assert mc.sc.are_we_master is False
        assert mc.sc.master_url is None
Ejemplo n.º 7
0
async def test_remote_client(registry_async_client):
    schema_avro = TypedSchema.parse(SchemaType.AVRO, schema_avro_json)
    reg_cli = SchemaRegistryClient()
    reg_cli.client = registry_async_client
    subject = new_random_name("subject")
    sc_id = await reg_cli.post_new_schema(subject, schema_avro)
    assert sc_id >= 0
    stored_schema = await reg_cli.get_schema_for_id(sc_id)
    assert stored_schema == schema_avro, f"stored schema {stored_schema.to_json()} is not {schema_avro.to_json()}"
    stored_id, stored_schema = await reg_cli.get_latest_schema(subject)
    assert stored_id == sc_id
    assert stored_schema == schema_avro
Ejemplo n.º 8
0
def fixture_registry_async_pair(session_tmpdir: TempDirCreator, kafka_server: Optional[KafkaConfig]):
    assert kafka_server, f"registry_async_pair can not be used if the env variable `{REGISTRY_URI}` or `{REST_URI}` is set"

    master_config_path = os.path.join(session_tmpdir(), "karapace_config_master.json")
    slave_config_path = os.path.join(session_tmpdir(), "karapace_config_slave.json")
    master_port, slave_port = 1234, 5678
    kafka_port = kafka_server.kafka_port
    topic_name = new_random_name("schema_pairs")
    group_id = new_random_name("schema_pairs")
    write_config(
        master_config_path, {
            "log_level": "WARNING",
            "bootstrap_uri": f"127.0.0.1:{kafka_port}",
            "topic_name": topic_name,
            "group_id": group_id,
            "advertised_hostname": "127.0.0.1",
            "karapace_registry": True,
            "port": master_port,
        }
    )
    write_config(
        slave_config_path, {
            "log_level": "WARNING",
            "bootstrap_uri": f"127.0.0.1:{kafka_port}",
            "topic_name": topic_name,
            "group_id": group_id,
            "advertised_hostname": "127.0.0.1",
            "karapace_registry": True,
            "port": slave_port,
        }
    )
    master_process = subprocess.Popen(["python", "-m", "karapace.karapace_all", master_config_path])
    slave_process = subprocess.Popen(["python", "-m", "karapace.karapace_all", slave_config_path])
    wait_for_port(1234)
    wait_for_port(5678)
    try:
        yield f"http://127.0.0.1:{master_port}", f"http://127.0.0.1:{slave_port}"
    finally:
        master_process.kill()
        slave_process.kill()
Ejemplo n.º 9
0
def fixture_registry_async_pair(session_tmpdir, kafka_server):
    master_config_path = os.path.join(session_tmpdir(),
                                      "karapace_config_master.json")
    slave_config_path = os.path.join(session_tmpdir(),
                                     "karapace_config_slave.json")
    master_port, slave_port = 1234, 5678
    kafka_port = kafka_server["kafka_port"]
    topic_name = new_random_name("schema_pairs")
    group_id = new_random_name("schema_pairs")
    write_config(
        master_config_path, {
            "log_level": "WARNING",
            "bootstrap_uri": f"127.0.0.1:{kafka_port}",
            "topic_name": topic_name,
            "group_id": group_id,
            "advertised_hostname": "127.0.0.1",
            "karapace_registry": True,
            "port": master_port,
        })
    write_config(
        slave_config_path, {
            "log_level": "WARNING",
            "bootstrap_uri": f"127.0.0.1:{kafka_port}",
            "topic_name": topic_name,
            "group_id": group_id,
            "advertised_hostname": "127.0.0.1",
            "karapace_registry": True,
            "port": slave_port,
        })
    master_process = subprocess.Popen(
        ["python", "-m", "karapace.karapace_all", master_config_path])
    slave_process = subprocess.Popen(
        ["python", "-m", "karapace.karapace_all", slave_config_path])
    wait_for_port(1234)
    wait_for_port(5678)
    try:
        yield f"http://127.0.0.1:{master_port}", f"http://127.0.0.1:{slave_port}"
    finally:
        master_process.kill()
        slave_process.kill()
Ejemplo n.º 10
0
def fixture_registry_async_pair(tmp_path: Path, kafka_servers: KafkaServers):
    master_config_path = tmp_path / "karapace_config_master.json"
    slave_config_path = tmp_path / "karapace_config_slave.json"
    master_port = get_random_port(port_range=REGISTRY_PORT_RANGE, blacklist=[])
    slave_port = get_random_port(port_range=REGISTRY_PORT_RANGE,
                                 blacklist=[master_port])
    topic_name = new_random_name("schema_pairs")
    group_id = new_random_name("schema_pairs")
    write_config(
        master_config_path, {
            "bootstrap_uri": kafka_servers.bootstrap_servers,
            "topic_name": topic_name,
            "group_id": group_id,
            "advertised_hostname": "127.0.0.1",
            "karapace_registry": True,
            "port": master_port,
        })
    write_config(
        slave_config_path, {
            "bootstrap_uri": kafka_servers.bootstrap_servers,
            "topic_name": topic_name,
            "group_id": group_id,
            "advertised_hostname": "127.0.0.1",
            "karapace_registry": True,
            "port": slave_port,
        })
    master_process = Popen(
        ["python", "-m", "karapace.karapace_all",
         str(master_config_path)])
    slave_process = Popen(
        ["python", "-m", "karapace.karapace_all",
         str(slave_config_path)])
    try:
        wait_for_port(master_port)
        wait_for_port(slave_port)
        yield f"http://127.0.0.1:{master_port}", f"http://127.0.0.1:{slave_port}"
    finally:
        master_process.kill()
        slave_process.kill()
Ejemplo n.º 11
0
async def test_remote_client_protobuf(registry_async_client):
    schema_protobuf = TypedSchema.parse(SchemaType.PROTOBUF,
                                        schema_protobuf_plain)
    reg_cli = SchemaRegistryClient()
    reg_cli.client = registry_async_client
    subject = new_random_name("subject")
    sc_id = await reg_cli.post_new_schema(subject, schema_protobuf)
    assert sc_id >= 0
    stored_schema = await reg_cli.get_schema_for_id(sc_id)
    assert stored_schema == schema_protobuf, f"stored schema {stored_schema} is not {schema_protobuf}"
    stored_id, stored_schema = await reg_cli.get_latest_schema(subject)
    assert stored_id == sc_id
    assert stored_schema == schema_protobuf
Ejemplo n.º 12
0
async def test_backup_restore(
    registry_async_client: Client,
    kafka_servers: KafkaServers,
    tmp_path: Path,
) -> None:
    subject = new_random_name("subject")
    restore_location = tmp_path / "restore.log"

    with restore_location.open("w") as fp:
        jsonlib.dump(
            [[
                {
                    "subject": subject,
                    "version": 1,
                    "magic": 1,
                    "keytype": "SCHEMA",
                },
                {
                    "deleted": False,
                    "id": 1,
                    "schema": "\"string\"",
                    "subject": subject,
                    "version": 1,
                },
            ]],
            fp=fp,
        )

    config = set_config_defaults({"bootstrap_uri": kafka_servers.bootstrap_servers})
    sb = SchemaBackup(config, str(restore_location))
    sb.restore_backup()

    # The restored karapace should have the previously created subject
    all_subjects = []
    expiration = Expiration.from_timeout(timeout=10)
    while subject not in all_subjects:
        expiration.raise_if_expired(msg=f"{subject} not in {all_subjects}")
        res = await registry_async_client.get("subjects")
        assert res.status_code == 200
        all_subjects = res.json()

    # Test a few exotic scenarios
    subject = new_random_name("subject")
    res = await registry_async_client.put(f"config/{subject}", json={"compatibility": "NONE"})
    assert res.status == 200
    assert res.json()["compatibility"] == "NONE"

    # Restore a compatibility config remove message
    with open(restore_location, "w") as fp:
        fp.write(
            """
[
    [
        {{
            "subject": "{subject_value}",
            "magic": 0,
            "keytype": "CONFIG"
        }},
        null
    ]
]
        """.format(subject_value=subject)
        )
    res = await registry_async_client.get(f"config/{subject}")
    assert res.status == 200
    sb.restore_backup()
    time.sleep(1.0)
    res = await registry_async_client.get(f"config/{subject}")
    assert res.status == 404

    # Restore a complete schema delete message
    subject = new_random_name("subject")
    res = await registry_async_client.put(f"config/{subject}", json={"compatibility": "NONE"})
    res = await registry_async_client.post(f"subjects/{subject}/versions", json={"schema": '{"type": "int"}'})
    res = await registry_async_client.post(f"subjects/{subject}/versions", json={"schema": '{"type": "float"}'})
    res = await registry_async_client.get(f"subjects/{subject}/versions")
    assert res.status == 200
    assert res.json() == [1, 2]
    with open(restore_location, "w") as fp:
        fp.write(
            """
[
    [
        {{
            "subject": "{subject_value}",
            "magic": 1,
            "keytype": "SCHEMA",
            "version": 2
        }},
        null
    ]
]
        """.format(subject_value=subject)
        )
    sb.restore_backup()
    time.sleep(1.0)
    res = await registry_async_client.get(f"subjects/{subject}/versions")
    assert res.status == 200
    assert res.json() == [1]

    # Schema delete for a nonexistent subject version is ignored
    subject = new_random_name("subject")
    res = await registry_async_client.post(f"subjects/{subject}/versions", json={"schema": '{"type": "string"}'})
    with open(restore_location, "w") as fp:
        fp.write(
            """
[
    [
        {{
            "subject": "{subject_value}",
            "magic": 1,
            "keytype": "SCHEMA",
            "version": 2
        }},
        null
    ]
]
        """.format(subject_value=subject)
        )
    sb.restore_backup()
    time.sleep(1.0)
    res = await registry_async_client.get(f"subjects/{subject}/versions")
    assert res.status == 200
    assert res.json() == [1]
Ejemplo n.º 13
0
async def test_schema_request_forwarding(registry_async_pair):
    master_url, slave_url = registry_async_pair
    max_tries, counter = 5, 0
    wait_time = 0.5
    subject = new_random_name("subject")
    schema = {"type": "string"}
    other_schema = {"type": "int"}
    # Config updates
    for subj_path in [None, subject]:
        if subj_path:
            path = f"config/{subject}"
        else:
            path = "config"
        for compat in ["FULL", "BACKWARD", "FORWARD", "NONE"]:
            resp = requests.put(f"{slave_url}/{path}", json={"compatibility": compat})
            assert resp.ok
            while True:
                if counter >= max_tries:
                    raise Exception("Compat update not propagated")
                resp = requests.get(f"{master_url}/{path}")
                if not resp.ok:
                    print(f"Invalid http status code: {resp.status_code}")
                    continue
                data = resp.json()
                if "compatibilityLevel" not in data:
                    print(f"Invalid response: {data}")
                    counter += 1
                    await asyncio.sleep(wait_time)
                    continue
                if data["compatibilityLevel"] != compat:
                    print(f"Bad compatibility: {data}")
                    counter += 1
                    await asyncio.sleep(wait_time)
                    continue
                break

    # New schema updates, last compatibility is None
    for s in [schema, other_schema]:
        resp = requests.post(f"{slave_url}/subjects/{subject}/versions", json={"schema": json.dumps(s)})
    assert resp.ok
    data = resp.json()
    assert "id" in data, data
    counter = 0
    while True:
        if counter >= max_tries:
            raise Exception("Subject schema data not propagated yet")
        resp = requests.get(f"{master_url}/subjects/{subject}/versions")
        if not resp.ok:
            print(f"Invalid http status code: {resp.status_code}")
            counter += 1
            continue
        data = resp.json()
        if not data:
            print(f"No versions registered for subject {subject} yet")
            counter += 1
            continue
        assert len(data) == 2, data
        assert data[0] == 1, data
        print("Subject schema data propagated")
        break

    # Schema deletions
    resp = requests.delete(f"{slave_url}/subjects/{subject}/versions/1")
    assert resp.ok
    counter = 0
    while True:
        if counter >= max_tries:
            raise Exception("Subject version deletion not propagated yet")
        resp = requests.get(f"{master_url}/subjects/{subject}/versions/1")
        if resp.ok:
            print(f"Subject {subject} still has version 1 on master")
            counter += 1
            continue
        assert resp.status_code == 404
        print(f"Subject {subject} no longer has version 1")
        break

    # Subject deletion
    resp = requests.get(f"{master_url}/subjects/")
    assert resp.ok
    data = resp.json()
    assert subject in data
    resp = requests.delete(f"{slave_url}/subjects/{subject}")
    assert resp.ok
    counter = 0
    while True:
        if counter >= max_tries:
            raise Exception("Subject deletion not propagated yet")
        resp = requests.get(f"{master_url}/subjects/")
        if not resp.ok:
            print("Could not retrieve subject list on master")
            counter += 1
            continue
        data = resp.json()
        assert subject not in data
        break
Ejemplo n.º 14
0
async def test_subscription(rest_async_client, admin_client, producer, trail):
    # The random name is necessary to avoid test errors, without it the second
    # parametrize test will fail. Issue: #178
    group_name = new_random_name("group")

    header = REST_HEADERS["binary"]
    topic_name = new_topic(admin_client)
    instance_id = await new_consumer(rest_async_client, group_name, fmt="binary", trail=trail)
    sub_path = f"/consumers/{group_name}/instances/{instance_id}/subscription{trail}"
    consume_path = f"/consumers/{group_name}/instances/{instance_id}/records{trail}?timeout=1000"
    res = await rest_async_client.get(sub_path, headers=header)
    assert res.ok
    data = res.json()
    assert "topics" in data and len(data["topics"]) == 0, \
        f"Expecting no subscription on freshly created consumer: {data}"
    # simple sub
    res = await rest_async_client.post(sub_path, json={"topics": [topic_name]}, headers=header)
    assert res.ok
    res = await rest_async_client.get(sub_path, headers=header)
    assert res.ok
    data = res.json()
    assert "topics" in data and len(data["topics"]) == 1 and data["topics"][0] == topic_name, \
        f"expecting {topic_name} in {data}"
    for _ in range(3):
        producer.send(topic_name, b"foo").get()
    resp = await rest_async_client.get(consume_path, headers=header)
    data = resp.json()
    assert resp.ok, f"Expected a successful response: {data['message']}"
    assert len(data) == 3, f"Expected to consume 3 messages but got {data}"

    # on delete it's empty again
    res = await rest_async_client.delete(sub_path, headers=header)
    assert res.ok
    res = await rest_async_client.get(sub_path, headers=header)
    assert res.ok
    data = res.json()
    assert "topics" in data and len(data["topics"]) == 0, f"expecting {data} to be empty"
    # one pattern sub will get all 3
    prefix = f"{hash(random.random())}"
    pattern_topics = [new_topic(admin_client, prefix=f"{prefix}{i}") for i in range(3)]
    res = await rest_async_client.post(sub_path, json={"topic_pattern": f"{prefix}.*"}, headers=REST_HEADERS["json"])
    assert res.ok

    # Consume so confluent rest reevaluates the subscription
    resp = await rest_async_client.get(consume_path, headers=header)
    assert resp.ok
    # Should we keep this behaviour

    res = await rest_async_client.get(sub_path, headers=header)
    assert res.ok
    data = res.json()
    assert "topics" in data and len(data["topics"]) == 3, "expecting subscription to 3 topics by pattern"
    subscribed_to = set(data["topics"])
    expected = set(pattern_topics)
    assert expected == subscribed_to, f"Expecting {expected} as subscribed to topics, but got {subscribed_to} instead"
    # writing to all 3 will get us results from all 3
    for t in pattern_topics:
        for _ in range(3):
            producer.send(t, b"bar").get()
    resp = await rest_async_client.get(consume_path, headers=header)
    data = resp.json()
    assert resp.ok, f"Expected a successful response: {data['message']}"
    assert len(data) == 9, f"Expected to consume 3 messages but got {data}"

    # topic name sub along with pattern will fail
    res = await rest_async_client.post(
        sub_path, json={
            "topics": [topic_name],
            "topic_pattern": "baz"
        }, headers=REST_HEADERS["json"]
    )
    assert res.status == 409, f"Invalid state error expected: {res.status}"
    data = res.json()
    assert data["error_code"] == 40903, f"Invalid state error expected: {data}"
    # assign after subscribe will fail
    assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}"
    assign_payload = {"partitions": [{"topic": topic_name, "partition": 0}]}
    res = await rest_async_client.post(assign_path, headers=REST_HEADERS["json"], json=assign_payload)
    assert res.status == 409, "Expecting status code 409 on assign after subscribe on the same consumer instance"