def test_avro_serializer_multiple_topic_per_serializer_instance( mock_schema_registry): """ Ensures schema_id is correctly find when same serializer is used for multiple topics """ conf = {'url': TEST_URL} test_client = mock_schema_registry(conf) topic1 = "test-topic1" topic2 = "test-topic2" test_serializer = AvroSerializer("string", test_client, conf={'auto.register.schemas': False}) def ensure_id_match(ctx): subject = "{}-{}".format(ctx.topic, ctx.field) expected_id = find_schema_id(subject) payload = test_serializer("test", ctx) _, schema_id = unpack('>bI', BytesIO(payload).read(5)) assert schema_id == expected_id ensure_id_match(SerializationContext(topic1, MessageField.KEY)) ensure_id_match(SerializationContext(topic2, MessageField.VALUE)) ensure_id_match(SerializationContext(topic1, MessageField.KEY)) # Ensure lookup_schema was invoked only once per shema assert test_client.counter['POST'].get( '/subjects/{}-key'.format(topic1)) == 1 assert test_client.counter['POST'].get( '/subjects/{}-value'.format(topic2)) == 1
def test_avro_serializer_preload_schema_id(mock_schema_registry): """ Ensures serializer do not reload schema ID from registry after user has force its preloading. """ conf = {'url': TEST_URL} test_client = mock_schema_registry(conf) topic1 = "test-topic1" topic2 = "test-topic2" test_serializer = AvroSerializer("string", test_client, conf={'auto.register.schemas': False}) test_serializer.load_registry_schema_id( SerializationContext(topic1, MessageField.KEY)) test_serializer.load_registry_schema_id( SerializationContext(topic2, MessageField.VALUE)) # Ensure lookup_schema was invoked only once per shema assert test_client.counter['POST'].get( '/subjects/{}-key'.format(topic1)) == 1 assert test_client.counter['POST'].get( '/subjects/{}-value'.format(topic2)) == 1 test_serializer("test", SerializationContext(topic1, MessageField.KEY)) test_serializer("test", SerializationContext(topic2, MessageField.VALUE)) # Ensure we did not look again to avro registry assert test_client.counter['POST'].get( '/subjects/{}-key'.format(topic1)) == 1 assert test_client.counter['POST'].get( '/subjects/{}-value'.format(topic2)) == 1
def test_avro_serializer_config_use_latest_version(mock_schema_registry): """ Ensures auto.register.schemas=False does not register schema """ conf = {'url': TEST_URL} test_client = mock_schema_registry(conf) topic = "test-use-latest-version" subject = topic + '-key' test_serializer = AvroSerializer(test_client, 'string', conf={ 'auto.register.schemas': False, 'use.latest.version': True }) test_serializer( "test", SerializationContext("test-use-latest-version", MessageField.KEY)) register_count = test_client.counter['POST'].get( '/subjects/{}/versions'.format(subject), 0) assert register_count == 0 # Ensure latest was requested assert test_client.counter['GET'].get( '/subjects/{}/versions/latest'.format(subject)) == 1
def assert_cb(err, msg): actual = value_deserializer(msg.value(), SerializationContext(topic, MessageField.VALUE, msg.headers())) if record_type == "record": assert [v == actual[k] for k, v in data.items()] elif record_type == 'float': assert data == pytest.approx(actual) else: assert actual == data
def test_avro_serializer_subject_name_strategy_default(load_avsc): """ Ensures record_subject_name_strategy returns the correct record name """ conf = {'url': TEST_URL} test_client = SchemaRegistryClient(conf) test_serializer = AvroSerializer(test_client, load_avsc('basic_schema.avsc')) ctx = SerializationContext('test_subj', MessageField.VALUE) assert test_serializer._subject_name_func( ctx, test_serializer._schema_name) == 'test_subj-value'
def test_avro_serializer_topic_record_subject_name_strategy_primitive(load_avsc): """ Ensures record_subject_name_strategy returns the correct record name. Also verifies transformation from Avro canonical form. """ conf = {'url': TEST_URL} test_client = SchemaRegistryClient(conf) test_serializer = AvroSerializer(test_client, 'int', conf={'subject.name.strategy': topic_record_subject_name_strategy}) ctx = SerializationContext('test_subj', MessageField.VALUE) assert test_serializer._subject_name_func( ctx, test_serializer._schema_name) == 'test_subj-int'
def test_avro_serializer_topic_record_subject_name_strategy(load_avsc): """ Ensures record_subject_name_strategy returns the correct record name """ conf = {'url': TEST_URL} test_client = SchemaRegistryClient(conf) test_serializer = AvroSerializer( load_avsc('basic_schema.avsc'), test_client, conf={'subject.name.strategy': topic_record_subject_name_strategy}) ctx = SerializationContext('test_subj', MessageField.VALUE) assert test_serializer._subject_name_func( ctx, test_serializer._schema_name) == 'test_subj-python.test.basic'
def process_row(serialized_data): schema = ''' { "namespace": "org.mddarr.rides.event.dto", "type": "record", "name": "AvroRideCoordinate", "fields": [ {"name": "dataID", "type": "string"}, {"name": "value", "type": "double"} ] } ''' schemaRegistryClient = SchemaRegistryClient( {"url": "http://localhost:8081"}) avroDeserializer = AvroDeserializer(schema, schemaRegistryClient) serializationContext = SerializationContext("time-series", schema) deserialized_row = avroDeserializer(serialized_data, serializationContext) return str(deserialized_row['value'])
def send_msg(self, message, partition=0, topic=None): # if no topic is provided, the first topic in the list is used as default if topic is None: out_topic = self.out_topic[0] else: out_topic = topic # encode the data with the specified Avro out_schema ctx = SerializationContext(out_topic, MessageField.VALUE) ser_message = self.serializer[out_topic](message, ctx) try: self.producer.produce(topic=out_topic, value=ser_message, partition=partition) except Exception as e: print(f"Error sending data to Kafka: {repr(e)}")
def process_row(serialized_data): schema = ''' { "namespace": "org.mddarr.rides.event.dto", "type": "record", "name": "AvroRideCoordinate", "fields": [ {"name": "eventime", "type": "long"}, {"name": "latitude", "type": "double"}, {"name": "longitude", "type": "double"} ] } ''' schemaRegistryClient = SchemaRegistryClient({"url": "http://localhost:8081"}) avroDeserializer = AvroDeserializer(schema, schemaRegistryClient) serializationContext = SerializationContext("coordinates", schema) deserialized_row = avroDeserializer(serialized_data, serializationContext) print("THE DESERIALIZED ROW LOOKS LIKE " + str(deserialized_row)) return [deserialized_row['latitude'], deserialized_row['longitude'],float(deserialized_row['eventime'])]
def test_avro_serializer_config_auto_register_schemas_false(mock_schema_registry): """ Ensures auto.register.schemas=False does not register schema """ conf = {'url': TEST_URL} test_client = mock_schema_registry(conf) topic = "test-auto-register" subject = topic + '-key' test_serializer = AvroSerializer(test_client, 'string', conf={'auto.register.schemas': False}) test_serializer("test", SerializationContext("test-auto-register", MessageField.KEY)) register_count = test_client.counter['POST'].get('/subjects/{}/versions' .format(subject), 0) assert register_count == 0 # Ensure lookup_schema was invoked instead assert test_client.counter['POST'].get('/subjects/{}'.format(subject)) == 1
from confluent_kafka.schema_registry import SchemaRegistryClient from confluent_kafka.schema_registry.avro import AvroDeserializer from confluent_kafka.serialization import SerializationContext schemaRegistryClient = SchemaRegistryClient({"url": "http://*****:*****@' ) serializationContext = SerializationContext("time-series", schema) deserialized_message = avroDeserializer(message, serializationContext)
"type": "string" }, "position": { "type": "integer" }, }, } schema_client = SchemaRegistryClient({"url": s.schema_server}) json_serializer = JSONSerializer(dumps(message_schema), schema_client, conf={"auto.register.schemas": False}) string_serializer = StringSerializer() key_context = SerializationContext(s.registrations_topic, MessageField.KEY) value_context = SerializationContext(s.registrations_topic, MessageField.VALUE) con = psycopg2.connect( database=s.db_database, user=s.db_user, password=s.db_password, host=s.db_server, port=s.db_port, ) def acked(err, msg): """ Kafka production error callback. Used to raise HTTP 500 when message production fails. :param err: Kakfa error
import time from confluent_kafka import Producer from confluent_kafka.serialization import StringSerializer, SerializationContext, MessageField print("start 1p_multiples") broker = 'kafka:9093' topic = 'multiples' conf = {'bootstrap.servers': broker} p = Producer(**conf) s = StringSerializer() print("created KafkaPC") ctx = SerializationContext(topic, MessageField.VALUE) for i in range(10): # casts int to string for StringSerializer/StringDeserializer message = s(str(i*i), ctx) # DeprecationWarning will be resolved in upcoming release # https://github.com/confluentinc/confluent-kafka-python/issues/763 p.produce(topic, message) print(f"Sent message {i*i}") time.sleep(1)
def export_items(self, items): items_grouped_by_type = group_by_item_type(items) for item_type, topic in self.item_type_to_topic_mapping.items(): item_group = items_grouped_by_type.get(item_type) if item_group: serialization_context = SerializationContext( topic, MessageField.VALUE) for item in item_group: headers = [] if item["type"] == "block": # Configure header & key key = bytes(str(item["number"]), "utf-8") headers.append(("hash", bytes(item["hash"], "utf-8"))) timestamp = int(item['timestamp']) # Create blocks_raw object value_object = blocks_raw.BlockRaw( type=str(item["type"]), number=item["number"], hash=str(item["hash"]), parent_hash=str(item["parent_hash"]), merkle_root_hash=str(item["merkle_root_hash"]), timestamp=item["timestamp"], version=str(item["version"]), transaction_count=item["transaction_count"], peer_id=str(item["peer_id"]), signature=str(item["signature"]), next_leader=str(item["next_leader"]), item_id=str(item["item_id"]), item_timestamp="", ) elif item["type"] == "log": # Configure header & key key = bytes(str(item["transaction_hash"]), "utf-8") headers.append( ("hash", bytes(item["transaction_hash"], "utf-8"))) timestamp = int(item['block_timestamp']) headers.append( ("address", bytes(item["address"], "utf-8"))) # Create logs_raw object value_object = logs_raw.LogRaw( type=str(item["type"]), log_index=item["log_index"], max_log_index=item["max_log_index"], transaction_hash=str(item["transaction_hash"]), transaction_index=item["transaction_index"], address=str(item["address"]), data=dumps(item["data"]), indexed=dumps(item["indexed"]), block_number=item["block_number"], block_timestamp=item["block_timestamp"], block_hash=str(item["block_hash"]), item_id=str(item["item_id"]), item_timestamp="", ) else: # Configure header & key headers.append(("hash", bytes(item["hash"], "utf-8"))) timestamp = int(item['block_timestamp']) if item["to_address"]: headers.append( ("to", bytes(item["to_address"], "utf-8"))) key = bytes(item["hash"], "utf-8") else: headers.append(("to", bytes("None", "utf-8"))) key = bytes(item["hash"], "utf-8") if item["from_address"]: headers.append( ("from", bytes(item["from_address"], "utf-8"))) else: headers.append(("from", bytes("None", "utf-8"))) value_object = transactions_raw.TransactionRaw( type=str(item["type"]), version=str(item["version"]), from_address=str(item["from_address"]), to_address=str(item["to_address"]), value=dec_to_hex(item["value"]), step_limit=item["step_limit"], timestamp=str(item["timestamp"]), block_timestamp=item["block_timestamp"], nid=item["nid"], nonce=dec_to_hex(item["nonce"]), hash=str(item["hash"]), transaction_index=item["transaction_index"], block_hash=str(item["block_hash"]), block_number=item["block_number"], fee=item["fee"], signature=str(item["signature"]), data_type=str(item["data_type"]), data=dumps(item["data"]), receipt_cumulative_step_used=item[ "receipt_cumulative_step_used"], receipt_step_used=item["receipt_step_used"], receipt_step_price=item["receipt_step_price"], receipt_score_address=str( item["receipt_score_address"]), receipt_logs=str(item["receipt_logs"]), receipt_status=item["receipt_status"], item_id=str(item["item_id"]), item_timestamp=str(item["item_timestamp"]), ) if self.serializers: self.producer.produce( topic=topic, value=self.serializers[item_type]( value_object, serialization_context), key=key, headers=headers, timestamp=timestamp, ) else: self.producer.produce( topic, value=MessageToJson(value_object), key=key, headers=headers, timestamp=timestamp, ) self.producer.poll(0) self.producer.flush()