async def fixture_registry_async( tmp_path: Path, kafka_server: Optional[KafkaConfig] ) -> AsyncIterator[KarapaceSchemaRegistry]: if not kafka_server: assert REGISTRY_URI in os.environ or REST_URI in os.environ instance, _ = mock_factory("registry")() yield instance else: config_path = tmp_path / "karapace_config.json" kafka_port = kafka_server.kafka_port config = set_config_defaults({ "log_level": "WARNING", "bootstrap_uri": f"127.0.0.1:{kafka_port}", "topic_name": new_random_name(), "group_id": new_random_name("schema_registry") }) write_config(config_path, config) registry = KarapaceSchemaRegistry(config_file_path=str(config_path), config=set_config_defaults(config)) await registry.get_master() try: yield registry finally: registry.close()
def test_master_selection(kafka_servers: KafkaServers, strategy: str) -> None: # Use random port to allow for parallel runs. port1 = get_random_port(port_range=TESTS_PORT_RANGE, blacklist=[]) port2 = get_random_port(port_range=TESTS_PORT_RANGE, blacklist=[port1]) port_aa, port_bb = sorted((port1, port2)) client_id_aa = new_random_name("master_selection_aa_") client_id_bb = new_random_name("master_selection_bb_") group_id = new_random_name("group_id") config_aa = set_config_defaults({ "advertised_hostname": "127.0.0.1", "bootstrap_uri": kafka_servers.bootstrap_servers, "client_id": client_id_aa, "group_id": group_id, "port": port_aa, "master_election_strategy": strategy, }) config_bb = set_config_defaults({ "advertised_hostname": "127.0.0.1", "bootstrap_uri": kafka_servers.bootstrap_servers, "client_id": client_id_bb, "group_id": group_id, "port": port_bb, "master_election_strategy": strategy, }) with closing(init_admin(config_aa)) as mc_aa, closing(init_admin(config_bb)) as mc_bb: if strategy == "lowest": master = mc_aa slave = mc_bb else: master = mc_bb slave = mc_aa # Wait for the election to happen while not is_master(master): time.sleep(0.3) while not has_master(slave): time.sleep(0.3) # Make sure the end configuration is as expected master_url = f'http://{master.config["host"]}:{master.config["port"]}' assert master.sc.election_strategy == strategy assert slave.sc.election_strategy == strategy assert master.sc.master_url == master_url assert slave.sc.master_url == master_url
def fixture_registry_async_pair(tmp_path: Path, kafka_server: Optional[KafkaConfig]): assert kafka_server, f"registry_async_pair can not be used if the env variable `{REGISTRY_URI}` or `{REST_URI}` is set" master_config_path = tmp_path / "karapace_config_master.json" slave_config_path = tmp_path / "karapace_config_slave.json" master_port = get_random_port(port_range=REGISTRY_PORT_RANGE, blacklist=[]) slave_port = get_random_port(port_range=REGISTRY_PORT_RANGE, blacklist=[master_port]) kafka_port = kafka_server.kafka_port topic_name = new_random_name("schema_pairs") group_id = new_random_name("schema_pairs") write_config( master_config_path, { "log_level": "WARNING", "bootstrap_uri": f"127.0.0.1:{kafka_port}", "topic_name": topic_name, "group_id": group_id, "advertised_hostname": "127.0.0.1", "karapace_registry": True, "port": master_port, }) write_config( slave_config_path, { "log_level": "WARNING", "bootstrap_uri": f"127.0.0.1:{kafka_port}", "topic_name": topic_name, "group_id": group_id, "advertised_hostname": "127.0.0.1", "karapace_registry": True, "port": slave_port, }) master_process = Popen( ["python", "-m", "karapace.karapace_all", str(master_config_path)]) slave_process = Popen( ["python", "-m", "karapace.karapace_all", str(slave_config_path)]) try: wait_for_port(master_port) wait_for_port(slave_port) yield f"http://127.0.0.1:{master_port}", f"http://127.0.0.1:{slave_port}" finally: master_process.kill() slave_process.kill()
async def insert_data(c): subject = new_random_name("subject") res = await c.post( "subjects/{}/versions".format(subject), json={"schema": '{"type": "string"}'}, ) assert res.status == 200 assert "id" in res.json() return subject
async def fixture_registry_async(session_tmpdir, kafka_server): if REGISTRY_URI in os.environ or REST_URI in os.environ: instance, _ = mock_factory("registry")() yield instance else: config_path = os.path.join(session_tmpdir(), "karapace_config.json") kafka_port = kafka_server["kafka_port"] write_config( config_path, { "log_level": "WARNING", "bootstrap_uri": f"127.0.0.1:{kafka_port}", "topic_name": new_random_name(), "group_id": new_random_name("schema_registry") }) registry = KarapaceSchemaRegistry(config_path) await registry.get_master() try: yield registry finally: registry.close()
def test_no_eligible_master(kafka_servers: KafkaServers) -> None: client_id = new_random_name("master_selection_") group_id = new_random_name("group_id") config_aa = set_config_defaults({ "advertised_hostname": "127.0.0.1", "bootstrap_uri": kafka_servers.bootstrap_servers, "client_id": client_id, "group_id": group_id, "port": get_random_port(port_range=TESTS_PORT_RANGE, blacklist=[]), "master_eligibility": False, }) with closing(init_admin(config_aa)) as mc: # Wait for the election to happen, ie. flag is not None while not mc.sc or mc.sc.are_we_master is None: time.sleep(0.3) # Make sure the end configuration is as expected assert mc.sc.are_we_master is False assert mc.sc.master_url is None
async def test_remote_client(registry_async_client): schema_avro = TypedSchema.parse(SchemaType.AVRO, schema_avro_json) reg_cli = SchemaRegistryClient() reg_cli.client = registry_async_client subject = new_random_name("subject") sc_id = await reg_cli.post_new_schema(subject, schema_avro) assert sc_id >= 0 stored_schema = await reg_cli.get_schema_for_id(sc_id) assert stored_schema == schema_avro, f"stored schema {stored_schema.to_json()} is not {schema_avro.to_json()}" stored_id, stored_schema = await reg_cli.get_latest_schema(subject) assert stored_id == sc_id assert stored_schema == schema_avro
def fixture_registry_async_pair(session_tmpdir: TempDirCreator, kafka_server: Optional[KafkaConfig]): assert kafka_server, f"registry_async_pair can not be used if the env variable `{REGISTRY_URI}` or `{REST_URI}` is set" master_config_path = os.path.join(session_tmpdir(), "karapace_config_master.json") slave_config_path = os.path.join(session_tmpdir(), "karapace_config_slave.json") master_port, slave_port = 1234, 5678 kafka_port = kafka_server.kafka_port topic_name = new_random_name("schema_pairs") group_id = new_random_name("schema_pairs") write_config( master_config_path, { "log_level": "WARNING", "bootstrap_uri": f"127.0.0.1:{kafka_port}", "topic_name": topic_name, "group_id": group_id, "advertised_hostname": "127.0.0.1", "karapace_registry": True, "port": master_port, } ) write_config( slave_config_path, { "log_level": "WARNING", "bootstrap_uri": f"127.0.0.1:{kafka_port}", "topic_name": topic_name, "group_id": group_id, "advertised_hostname": "127.0.0.1", "karapace_registry": True, "port": slave_port, } ) master_process = subprocess.Popen(["python", "-m", "karapace.karapace_all", master_config_path]) slave_process = subprocess.Popen(["python", "-m", "karapace.karapace_all", slave_config_path]) wait_for_port(1234) wait_for_port(5678) try: yield f"http://127.0.0.1:{master_port}", f"http://127.0.0.1:{slave_port}" finally: master_process.kill() slave_process.kill()
def fixture_registry_async_pair(session_tmpdir, kafka_server): master_config_path = os.path.join(session_tmpdir(), "karapace_config_master.json") slave_config_path = os.path.join(session_tmpdir(), "karapace_config_slave.json") master_port, slave_port = 1234, 5678 kafka_port = kafka_server["kafka_port"] topic_name = new_random_name("schema_pairs") group_id = new_random_name("schema_pairs") write_config( master_config_path, { "log_level": "WARNING", "bootstrap_uri": f"127.0.0.1:{kafka_port}", "topic_name": topic_name, "group_id": group_id, "advertised_hostname": "127.0.0.1", "karapace_registry": True, "port": master_port, }) write_config( slave_config_path, { "log_level": "WARNING", "bootstrap_uri": f"127.0.0.1:{kafka_port}", "topic_name": topic_name, "group_id": group_id, "advertised_hostname": "127.0.0.1", "karapace_registry": True, "port": slave_port, }) master_process = subprocess.Popen( ["python", "-m", "karapace.karapace_all", master_config_path]) slave_process = subprocess.Popen( ["python", "-m", "karapace.karapace_all", slave_config_path]) wait_for_port(1234) wait_for_port(5678) try: yield f"http://127.0.0.1:{master_port}", f"http://127.0.0.1:{slave_port}" finally: master_process.kill() slave_process.kill()
def fixture_registry_async_pair(tmp_path: Path, kafka_servers: KafkaServers): master_config_path = tmp_path / "karapace_config_master.json" slave_config_path = tmp_path / "karapace_config_slave.json" master_port = get_random_port(port_range=REGISTRY_PORT_RANGE, blacklist=[]) slave_port = get_random_port(port_range=REGISTRY_PORT_RANGE, blacklist=[master_port]) topic_name = new_random_name("schema_pairs") group_id = new_random_name("schema_pairs") write_config( master_config_path, { "bootstrap_uri": kafka_servers.bootstrap_servers, "topic_name": topic_name, "group_id": group_id, "advertised_hostname": "127.0.0.1", "karapace_registry": True, "port": master_port, }) write_config( slave_config_path, { "bootstrap_uri": kafka_servers.bootstrap_servers, "topic_name": topic_name, "group_id": group_id, "advertised_hostname": "127.0.0.1", "karapace_registry": True, "port": slave_port, }) master_process = Popen( ["python", "-m", "karapace.karapace_all", str(master_config_path)]) slave_process = Popen( ["python", "-m", "karapace.karapace_all", str(slave_config_path)]) try: wait_for_port(master_port) wait_for_port(slave_port) yield f"http://127.0.0.1:{master_port}", f"http://127.0.0.1:{slave_port}" finally: master_process.kill() slave_process.kill()
async def test_remote_client_protobuf(registry_async_client): schema_protobuf = TypedSchema.parse(SchemaType.PROTOBUF, schema_protobuf_plain) reg_cli = SchemaRegistryClient() reg_cli.client = registry_async_client subject = new_random_name("subject") sc_id = await reg_cli.post_new_schema(subject, schema_protobuf) assert sc_id >= 0 stored_schema = await reg_cli.get_schema_for_id(sc_id) assert stored_schema == schema_protobuf, f"stored schema {stored_schema} is not {schema_protobuf}" stored_id, stored_schema = await reg_cli.get_latest_schema(subject) assert stored_id == sc_id assert stored_schema == schema_protobuf
async def test_backup_restore( registry_async_client: Client, kafka_servers: KafkaServers, tmp_path: Path, ) -> None: subject = new_random_name("subject") restore_location = tmp_path / "restore.log" with restore_location.open("w") as fp: jsonlib.dump( [[ { "subject": subject, "version": 1, "magic": 1, "keytype": "SCHEMA", }, { "deleted": False, "id": 1, "schema": "\"string\"", "subject": subject, "version": 1, }, ]], fp=fp, ) config = set_config_defaults({"bootstrap_uri": kafka_servers.bootstrap_servers}) sb = SchemaBackup(config, str(restore_location)) sb.restore_backup() # The restored karapace should have the previously created subject all_subjects = [] expiration = Expiration.from_timeout(timeout=10) while subject not in all_subjects: expiration.raise_if_expired(msg=f"{subject} not in {all_subjects}") res = await registry_async_client.get("subjects") assert res.status_code == 200 all_subjects = res.json() # Test a few exotic scenarios subject = new_random_name("subject") res = await registry_async_client.put(f"config/{subject}", json={"compatibility": "NONE"}) assert res.status == 200 assert res.json()["compatibility"] == "NONE" # Restore a compatibility config remove message with open(restore_location, "w") as fp: fp.write( """ [ [ {{ "subject": "{subject_value}", "magic": 0, "keytype": "CONFIG" }}, null ] ] """.format(subject_value=subject) ) res = await registry_async_client.get(f"config/{subject}") assert res.status == 200 sb.restore_backup() time.sleep(1.0) res = await registry_async_client.get(f"config/{subject}") assert res.status == 404 # Restore a complete schema delete message subject = new_random_name("subject") res = await registry_async_client.put(f"config/{subject}", json={"compatibility": "NONE"}) res = await registry_async_client.post(f"subjects/{subject}/versions", json={"schema": '{"type": "int"}'}) res = await registry_async_client.post(f"subjects/{subject}/versions", json={"schema": '{"type": "float"}'}) res = await registry_async_client.get(f"subjects/{subject}/versions") assert res.status == 200 assert res.json() == [1, 2] with open(restore_location, "w") as fp: fp.write( """ [ [ {{ "subject": "{subject_value}", "magic": 1, "keytype": "SCHEMA", "version": 2 }}, null ] ] """.format(subject_value=subject) ) sb.restore_backup() time.sleep(1.0) res = await registry_async_client.get(f"subjects/{subject}/versions") assert res.status == 200 assert res.json() == [1] # Schema delete for a nonexistent subject version is ignored subject = new_random_name("subject") res = await registry_async_client.post(f"subjects/{subject}/versions", json={"schema": '{"type": "string"}'}) with open(restore_location, "w") as fp: fp.write( """ [ [ {{ "subject": "{subject_value}", "magic": 1, "keytype": "SCHEMA", "version": 2 }}, null ] ] """.format(subject_value=subject) ) sb.restore_backup() time.sleep(1.0) res = await registry_async_client.get(f"subjects/{subject}/versions") assert res.status == 200 assert res.json() == [1]
async def test_schema_request_forwarding(registry_async_pair): master_url, slave_url = registry_async_pair max_tries, counter = 5, 0 wait_time = 0.5 subject = new_random_name("subject") schema = {"type": "string"} other_schema = {"type": "int"} # Config updates for subj_path in [None, subject]: if subj_path: path = f"config/{subject}" else: path = "config" for compat in ["FULL", "BACKWARD", "FORWARD", "NONE"]: resp = requests.put(f"{slave_url}/{path}", json={"compatibility": compat}) assert resp.ok while True: if counter >= max_tries: raise Exception("Compat update not propagated") resp = requests.get(f"{master_url}/{path}") if not resp.ok: print(f"Invalid http status code: {resp.status_code}") continue data = resp.json() if "compatibilityLevel" not in data: print(f"Invalid response: {data}") counter += 1 await asyncio.sleep(wait_time) continue if data["compatibilityLevel"] != compat: print(f"Bad compatibility: {data}") counter += 1 await asyncio.sleep(wait_time) continue break # New schema updates, last compatibility is None for s in [schema, other_schema]: resp = requests.post(f"{slave_url}/subjects/{subject}/versions", json={"schema": json.dumps(s)}) assert resp.ok data = resp.json() assert "id" in data, data counter = 0 while True: if counter >= max_tries: raise Exception("Subject schema data not propagated yet") resp = requests.get(f"{master_url}/subjects/{subject}/versions") if not resp.ok: print(f"Invalid http status code: {resp.status_code}") counter += 1 continue data = resp.json() if not data: print(f"No versions registered for subject {subject} yet") counter += 1 continue assert len(data) == 2, data assert data[0] == 1, data print("Subject schema data propagated") break # Schema deletions resp = requests.delete(f"{slave_url}/subjects/{subject}/versions/1") assert resp.ok counter = 0 while True: if counter >= max_tries: raise Exception("Subject version deletion not propagated yet") resp = requests.get(f"{master_url}/subjects/{subject}/versions/1") if resp.ok: print(f"Subject {subject} still has version 1 on master") counter += 1 continue assert resp.status_code == 404 print(f"Subject {subject} no longer has version 1") break # Subject deletion resp = requests.get(f"{master_url}/subjects/") assert resp.ok data = resp.json() assert subject in data resp = requests.delete(f"{slave_url}/subjects/{subject}") assert resp.ok counter = 0 while True: if counter >= max_tries: raise Exception("Subject deletion not propagated yet") resp = requests.get(f"{master_url}/subjects/") if not resp.ok: print("Could not retrieve subject list on master") counter += 1 continue data = resp.json() assert subject not in data break
async def test_subscription(rest_async_client, admin_client, producer, trail): # The random name is necessary to avoid test errors, without it the second # parametrize test will fail. Issue: #178 group_name = new_random_name("group") header = REST_HEADERS["binary"] topic_name = new_topic(admin_client) instance_id = await new_consumer(rest_async_client, group_name, fmt="binary", trail=trail) sub_path = f"/consumers/{group_name}/instances/{instance_id}/subscription{trail}" consume_path = f"/consumers/{group_name}/instances/{instance_id}/records{trail}?timeout=1000" res = await rest_async_client.get(sub_path, headers=header) assert res.ok data = res.json() assert "topics" in data and len(data["topics"]) == 0, \ f"Expecting no subscription on freshly created consumer: {data}" # simple sub res = await rest_async_client.post(sub_path, json={"topics": [topic_name]}, headers=header) assert res.ok res = await rest_async_client.get(sub_path, headers=header) assert res.ok data = res.json() assert "topics" in data and len(data["topics"]) == 1 and data["topics"][0] == topic_name, \ f"expecting {topic_name} in {data}" for _ in range(3): producer.send(topic_name, b"foo").get() resp = await rest_async_client.get(consume_path, headers=header) data = resp.json() assert resp.ok, f"Expected a successful response: {data['message']}" assert len(data) == 3, f"Expected to consume 3 messages but got {data}" # on delete it's empty again res = await rest_async_client.delete(sub_path, headers=header) assert res.ok res = await rest_async_client.get(sub_path, headers=header) assert res.ok data = res.json() assert "topics" in data and len(data["topics"]) == 0, f"expecting {data} to be empty" # one pattern sub will get all 3 prefix = f"{hash(random.random())}" pattern_topics = [new_topic(admin_client, prefix=f"{prefix}{i}") for i in range(3)] res = await rest_async_client.post(sub_path, json={"topic_pattern": f"{prefix}.*"}, headers=REST_HEADERS["json"]) assert res.ok # Consume so confluent rest reevaluates the subscription resp = await rest_async_client.get(consume_path, headers=header) assert resp.ok # Should we keep this behaviour res = await rest_async_client.get(sub_path, headers=header) assert res.ok data = res.json() assert "topics" in data and len(data["topics"]) == 3, "expecting subscription to 3 topics by pattern" subscribed_to = set(data["topics"]) expected = set(pattern_topics) assert expected == subscribed_to, f"Expecting {expected} as subscribed to topics, but got {subscribed_to} instead" # writing to all 3 will get us results from all 3 for t in pattern_topics: for _ in range(3): producer.send(t, b"bar").get() resp = await rest_async_client.get(consume_path, headers=header) data = resp.json() assert resp.ok, f"Expected a successful response: {data['message']}" assert len(data) == 9, f"Expected to consume 3 messages but got {data}" # topic name sub along with pattern will fail res = await rest_async_client.post( sub_path, json={ "topics": [topic_name], "topic_pattern": "baz" }, headers=REST_HEADERS["json"] ) assert res.status == 409, f"Invalid state error expected: {res.status}" data = res.json() assert data["error_code"] == 40903, f"Invalid state error expected: {data}" # assign after subscribe will fail assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}" assign_payload = {"partitions": [{"topic": topic_name, "partition": 0}]} res = await rest_async_client.post(assign_path, headers=REST_HEADERS["json"], json=assign_payload) assert res.status == 409, "Expecting status code 409 on assign after subscribe on the same consumer instance"