def plain_avro_producer(running_cluster_config: Dict[str, str], topic_and_partitions: Tuple[str, int], records) -> SerializingProducer: """ Creates a plain `confluent_kafka.avro.AvroProducer` that can be used to publish messages. """ topic_id, _ = topic_and_partitions key, value = records[0] schema_registry_client = SchemaRegistryClient( {"url": running_cluster_config["schema-registry"]}) key_schema = to_key_schema(key) avro_key_serializer = AvroSerializer( schema_registry_client=schema_registry_client, schema_str=key_schema) value_schema = to_value_schema(value) avro_value_serializer = AvroSerializer( schema_registry_client=schema_registry_client, schema_str=value_schema) producer_config = { "bootstrap.servers": running_cluster_config["broker"], "key.serializer": avro_key_serializer, "value.serializer": avro_value_serializer, } producer = SerializingProducer(producer_config) producer.produce = partial(producer.produce, topic=topic_id) return producer
def test_avro_serializer_subject_name_strategy_default(load_avsc): """ Ensures record_subject_name_strategy returns the correct record name """ conf = {'url': TEST_URL} test_client = SchemaRegistryClient(conf) test_serializer = AvroSerializer(test_client, load_avsc('basic_schema.avsc')) ctx = SerializationContext('test_subj', MessageField.VALUE) assert test_serializer._subject_name_func( ctx, test_serializer._schema_name) == 'test_subj-value'
def __init__(self, config: KafkaEmitterConfig): self.config = config schema_registry_conf = { "url": self.config.connection.schema_registry_url, **self.config.connection.schema_registry_config, } schema_registry_client = SchemaRegistryClient(schema_registry_conf) def convert_mce_to_dict(mce: MetadataChangeEvent, ctx: SerializationContext) -> dict: tuple_encoding = mce.to_obj(tuples=True) return tuple_encoding mce_avro_serializer = AvroSerializer( schema_str=getMetadataChangeEventSchema(), schema_registry_client=schema_registry_client, to_dict=convert_mce_to_dict, ) def convert_mcp_to_dict( mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper], ctx: SerializationContext, ) -> dict: tuple_encoding = mcp.to_obj(tuples=True) return tuple_encoding mcp_avro_serializer = AvroSerializer( schema_str=getMetadataChangeProposalSchema(), schema_registry_client=schema_registry_client, to_dict=convert_mcp_to_dict, ) # We maintain a map of producers for each kind of event producers_config = { MCE_KEY: { "bootstrap.servers": self.config.connection.bootstrap, "key.serializer": StringSerializer("utf_8"), "value.serializer": mce_avro_serializer, **self.config.connection.producer_config, }, MCP_KEY: { "bootstrap.servers": self.config.connection.bootstrap, "key.serializer": StringSerializer("utf_8"), "value.serializer": mcp_avro_serializer, **self.config.connection.producer_config, }, } self.producers = { key: SerializingProducer(value) for (key, value) in producers_config.items() }
def test_avro_serializer_topic_record_subject_name_strategy_primitive(load_avsc): """ Ensures record_subject_name_strategy returns the correct record name. Also verifies transformation from Avro canonical form. """ conf = {'url': TEST_URL} test_client = SchemaRegistryClient(conf) test_serializer = AvroSerializer(test_client, 'int', conf={'subject.name.strategy': topic_record_subject_name_strategy}) ctx = SerializationContext('test_subj', MessageField.VALUE) assert test_serializer._subject_name_func( ctx, test_serializer._schema_name) == 'test_subj-int'
def test_avro_serializer_topic_record_subject_name_strategy(load_avsc): """ Ensures record_subject_name_strategy returns the correct record name """ conf = {'url': TEST_URL} test_client = SchemaRegistryClient(conf) test_serializer = AvroSerializer( load_avsc('basic_schema.avsc'), test_client, conf={'subject.name.strategy': topic_record_subject_name_strategy}) ctx = SerializationContext('test_subj', MessageField.VALUE) assert test_serializer._subject_name_func( ctx, test_serializer._schema_name) == 'test_subj-python.test.basic'
def __init__(self, config: KafkaSinkConfig, ctx): super().__init__(ctx) self.config = config self.report = SinkReport() schema_registry_conf = { 'url': self.config.connection.schema_registry_url, **self.config.connection.schema_registry_config, } schema_registry_client = SchemaRegistryClient(schema_registry_conf) def convert_mce_to_dict(mce: MetadataChangeEvent, ctx): tuple_encoding = mce.to_obj(tuples=True) return tuple_encoding avro_serializer = AvroSerializer(SCHEMA_JSON_STR, schema_registry_client, to_dict=convert_mce_to_dict) producer_config = { "bootstrap.servers": self.config.connection.bootstrap, 'key.serializer': StringSerializer('utf_8'), 'value.serializer': avro_serializer, **self.config.connection.producer_config, } self.producer = SerializingProducer(producer_config)
def create_serializer(self): self.serializer = {} if self.out_topic is not None: for topic in self.out_topic: schema_str = self.out_schema[topic].schema_str self.serializer[topic] = AvroSerializer( schema_str, self.schema_registry)
def __init__(self, producer_name, value_schema, groupID='KafkaAvroProducer'): # Consumer name for logging purposes self.logging_prefix = '[' + producer_name + '][KafkaAvroProducer]' # Schema Registry configuration self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf() # Schema Registry Client self.schema_registry_client = SchemaRegistryClient( self.schema_registry_conf) # String Serializer for the key self.key_serializer = StringSerializer('utf_8') # Avro Serializer for the value self.value_serializer = AvroSerializer(value_schema, self.schema_registry_client) # Get the producer configuration self.producer_conf = EventBackboneConfig.getProducerConfiguration( groupID, self.key_serializer, self.value_serializer) EventBackboneConfig.printProducerConfiguration( self.logging_prefix, self.producer_conf, self.schema_registry_conf['url']) # Create the producer self.producer = SerializingProducer(self.producer_conf)
def create_producer(self, registry_client): """ Creates a SerializingProducer object to produce to kafka topic :param registry_client: SchemaRegistryClient get this from register_client() :return: SerializingProducer Object based on config values """ metadata_schema = None if self.metadata_type == "COLLECTION": metadata_schema = registry_client.get_latest_version(self.collection_topic + '-value').schema.schema_str if self.metadata_type == "GRANULE": metadata_schema = registry_client.get_latest_version(self.granule_topic + '-value').schema.schema_str metadata_serializer = AvroSerializer(metadata_schema, registry_client) producer_conf = {'bootstrap.servers': self.brokers} if self.security: producer_conf['security.protocol'] = 'SSL' producer_conf['ssl.ca.location'] = self.conf['security']['caLoc'] producer_conf['ssl.key.location'] = self.conf['security']['keyLoc'] producer_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] meta_producer_conf = producer_conf meta_producer_conf['value.serializer'] = metadata_serializer metadata_producer = SerializingProducer(meta_producer_conf) return metadata_producer
def get_serializer(cls): """ Get or create a single instance of the SUBSCRIPTION_LICENSE_MODIFIED signal serializer :return: AvroSerializer """ if cls.SERIALIZER is None: KAFKA_SCHEMA_REGISTRY_CONFIG = { 'url': getattr(settings, 'SCHEMA_REGISTRY_URL', ''), 'basic.auth.user.info': f"{getattr(settings,'SCHEMA_REGISTRY_API_KEY','')}" f":{getattr(settings,'SCHEMA_REGISTRY_API_SECRET','')}", } signal_serializer = AvroSignalSerializer( SUBSCRIPTION_LICENSE_MODIFIED) def inner_to_dict(event_data, ctx=None): # pylint: disable=unused-argument return signal_serializer.to_dict(event_data) schema_registry_client = SchemaRegistryClient( KAFKA_SCHEMA_REGISTRY_CONFIG) cls.SERIALIZER = AvroSerializer( schema_str=signal_serializer.schema_string(), schema_registry_client=schema_registry_client, to_dict=inner_to_dict) return cls.SERIALIZER return cls.SERIALIZER
def __init__(self, config: KafkaEmitterConfig): self.config = config schema_registry_conf = { "url": self.config.connection.schema_registry_url, **self.config.connection.schema_registry_config, } schema_registry_client = SchemaRegistryClient(schema_registry_conf) def convert_mce_to_dict(mce: MetadataChangeEvent, ctx): tuple_encoding = mce.to_obj(tuples=True) return tuple_encoding avro_serializer = AvroSerializer( schema_str=SCHEMA_JSON_STR, schema_registry_client=schema_registry_client, to_dict=convert_mce_to_dict, ) producer_config = { "bootstrap.servers": self.config.connection.bootstrap, "key.serializer": StringSerializer("utf_8"), "value.serializer": avro_serializer, **self.config.connection.producer_config, } self.producer = SerializingProducer(producer_config)
def test_avro_record_serialization_custom(kafka_cluster): """ Tests basic Avro serializer to_dict and from_dict object hook functionality. Args: kafka_cluster (KafkaClusterFixture): cluster fixture """ topic = kafka_cluster.create_topic("serialization-avro") sr = kafka_cluster.schema_registry() user = User('Bowie', 47, 'purple') value_serializer = AvroSerializer(sr, User.schema_str, lambda user, ctx: dict(name=user.name, favorite_number=user.favorite_number, favorite_color=user.favorite_color)) value_deserializer = AvroDeserializer(sr, User.schema_str, lambda user_dict, ctx: User(**user_dict)) producer = kafka_cluster.producer(value_serializer=value_serializer) producer.produce(topic, value=user, partition=0) producer.flush() consumer = kafka_cluster.consumer(value_deserializer=value_deserializer) consumer.assign([TopicPartition(topic, 0)]) msg = consumer.poll() user2 = msg.value() assert user2 == user
def _make_serializer(self): return { SchemaType.AVRO: AvroSerializer(self.sr_client, AVRO_SCHEMA), SchemaType.PROTOBUF: ProtobufSerializer(ProtobufPayloadClass, self.sr_client) }[self.schema_type]
def test_avro_serializer_config_use_latest_version(mock_schema_registry): """ Ensures auto.register.schemas=False does not register schema """ conf = {'url': TEST_URL} test_client = mock_schema_registry(conf) topic = "test-use-latest-version" subject = topic + '-key' test_serializer = AvroSerializer(test_client, 'string', conf={ 'auto.register.schemas': False, 'use.latest.version': True }) test_serializer( "test", SerializationContext("test-use-latest-version", MessageField.KEY)) register_count = test_client.counter['POST'].get( '/subjects/{}/versions'.format(subject), 0) assert register_count == 0 # Ensure latest was requested assert test_client.counter['GET'].get( '/subjects/{}/versions/latest'.format(subject)) == 1
def __init__(self, producer_name, value_schema, groupID = 'KafkaAvroProducer', kafka_brokers = "", kafka_user = "", kafka_pwd = "", kafka_cacert = "", kafka_sasl_mechanism = "", topic_name = ""): self.kafka_brokers = kafka_brokers self.kafka_user = kafka_user self.kafka_pwd = kafka_pwd self.kafka_sasl_mechanism = kafka_sasl_mechanism self.kafka_cacert = kafka_cacert self.topic_name = topic_name # Consumer name for logging purposes self.logging_prefix = '['+ producer_name + '][KafkaAvroProducer]' # Schema Registry configuration self.schema_registry_conf = {'url': config.SCHEMA_REGISTRY_URL} # Schema Registry Client self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf) # String Serializer for the key self.key_serializer = StringSerializer('utf_8') # Avro Serializer for the value print(value_schema) print(type(value_schema)) value_schema=value_schema.strip() self.value_serializer = AvroSerializer(value_schema, self.schema_registry_client) # Get the producer configuration self.producer_conf = self.getProducerConfiguration(groupID, self.key_serializer, self.value_serializer) # Create the producer self.producer = SerializingProducer(self.producer_conf)
def test_avro_serializer_multiple_topic_per_serializer_instance( mock_schema_registry): """ Ensures schema_id is correctly find when same serializer is used for multiple topics """ conf = {'url': TEST_URL} test_client = mock_schema_registry(conf) topic1 = "test-topic1" topic2 = "test-topic2" test_serializer = AvroSerializer("string", test_client, conf={'auto.register.schemas': False}) def ensure_id_match(ctx): subject = "{}-{}".format(ctx.topic, ctx.field) expected_id = find_schema_id(subject) payload = test_serializer("test", ctx) _, schema_id = unpack('>bI', BytesIO(payload).read(5)) assert schema_id == expected_id ensure_id_match(SerializationContext(topic1, MessageField.KEY)) ensure_id_match(SerializationContext(topic2, MessageField.VALUE)) ensure_id_match(SerializationContext(topic1, MessageField.KEY)) # Ensure lookup_schema was invoked only once per shema assert test_client.counter['POST'].get( '/subjects/{}-key'.format(topic1)) == 1 assert test_client.counter['POST'].get( '/subjects/{}-value'.format(topic2)) == 1
def write_to_kafka(bootstrap_servers, schema_registry_url, topic_name, data): print("Kafka Version : ", confluent_kafka.version(),confluent_kafka.libversion()) schema_registry_conf = {'url': schema_registry_url} schema_registry_client = SchemaRegistryClient(schema_registry_conf) value_avro_serializer = AvroSerializer(schemas.weather_source_schema, schema_registry_client) string_serializer = StringSerializer('utf-8') conf = {'bootstrap.servers': bootstrap_servers, 'client.id': socket.gethostname(), 'on_delivery': delivery_report, 'key.serializer': string_serializer, 'value.serializer': value_avro_serializer } avroProducer = SerializingProducer(conf) key=datetime.date.today() + '~' + str(data['lat']) + '~' + str(data['lon']) message = json.dumps(data, cls=DatetimeEncoder) print("Key Type : ", type(key)) print("Value Type : ", type(json.loads(message))) avroProducer.produce(topic=topic_name, key=key, value=json.loads(message)) avroProducer.flush()
def _commit(self) -> None: """ Retrieves the current offset by calling :meth:`pyconnect.pyconnectsource.PyConnectSource.get_index` and publishes it to the offset topic that is defined in this sources :class:`pyconnect.config.SourceConfig` instance. """ idx = self.get_index() idx_schema = to_value_schema(idx) avro_value_serializer = AvroSerializer( schema_registry_client=self.schema_registry_client, schema_str=idx_schema) producer_config = { "bootstrap.servers": self.config["bootstrap.servers"], "key.serializer": None, "value.serializer": avro_value_serializer, **self.config["kafka_opts"], **self.config["kafka_producer_opts"], } offset_producer = SerializingProducer(producer_config) offset_producer.produce(key=None, value=idx, topic=self.config["offset_topic"]) offset_producer.flush()
def key_serializer(self): if not self._key_fields: return None return AvroSerializer( schema_str=self._record.key_schema_string, schema_registry_client=self._schema_registry_client )
def main(args): topic = args.topic key_schema_str = open('schema/KeySchema.avsc', "r").read() value_schema_str = open('schema/ValueSchema.avsc', "r").read() schema_registry_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(schema_registry_conf) avro_key_serializer = AvroSerializer(key_schema_str, schema_registry_client, user_quote_key_to_dict) avro_value_serializer = AvroSerializer(value_schema_str, schema_registry_client, user_quote_value_to_dict) producer_conf = {'bootstrap.servers': args.bootstrap_servers, 'key.serializer': avro_key_serializer, 'value.serializer': avro_value_serializer} producer = SerializingProducer(producer_conf) print("Producing user records to topic {}. ^C to exit.".format(topic)) while True: # Serve on_delivery callbacks from previous calls to produce() producer.poll(0.0) try: user_id = input("Enter User ID: ") product_id = input("Enter Product ID: ") quoted_price = input("Enter price: ") quoted_quantity = int(input("Enter the desired quantity: ")) user_note = input("Enter additional note: ") user_quote_key = UserQuoteKey(user_id=int(user_id)) user_quote_value = UserQuoteValue(product_id=int(product_id), quoted_price=int(quoted_price), quoted_quantity=quoted_quantity, user_note=user_note) producer.produce(topic=topic, key=user_quote_key, value=user_quote_value, on_delivery=delivery_report) except KeyboardInterrupt: break except ValueError: print("Invalid input, discarding record...") continue print("\nFlushing records...") producer.flush()
def test_avro_serializer_config_auto_register_schemas(): """ Ensures auto.register.schemas is applied """ conf = {'url': TEST_URL} test_client = SchemaRegistryClient(conf) test_serializer = AvroSerializer(test_client, 'string', conf={'auto.register.schemas': False}) assert not test_serializer._auto_register
def test_avro_serializer_config_subject_name_strategy_invalid(): """ Ensures subject.name.strategy is applied """ conf = {'url': TEST_URL} test_client = SchemaRegistryClient(conf) with pytest.raises(ValueError, match="must be callable"): AvroSerializer(test_client, 'int', conf={'subject.name.strategy': dict()})
def test_avro_serializer_config_auto_register_schemas_invalid(): """ Ensures auto.register.schemas is applied """ conf = {'url': TEST_URL} test_client = SchemaRegistryClient(conf) with pytest.raises(ValueError, match="must be a boolean"): AvroSerializer(test_client, 'string', conf={'auto.register.schemas': dict()})
def main(args): topic = args.topic outputtopic = args.outputtopic schema_str = EventSchema schema_enriched_event_str = EnrichedEventSchema sr_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(sr_conf) avro_deserializer = AvroDeserializer(schema_str, schema_registry_client) string_deserializer = StringDeserializer('utf_8') avro_serializer = AvroSerializer(schema_enriched_event_str, schema_registry_client) consumer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': args.group + str(random.Random()), 'auto.offset.reset': "latest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) cluster = Cluster([args.host]) session = cluster.connect("datascience") session.row_factory = dict_factory producer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.serializer': StringSerializer('utf_8'), 'value.serializer': avro_serializer } producer = SerializingProducer(producer_conf) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. start = time.time() msg = consumer.poll(1.0) if msg is None: continue evt = msg.value() enrich(evt, session, producer, outputtopic) except Exception: print('Exception', sys.exc_info()[0]) continue consumer.close()
def main(args): topic = args.topic schema_str = """ { "namespace": "confluent.io.examples.serialization.avro", "name": "User", "type": "record", "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": "int"}, {"name": "favorite_color", "type": "string"} ] } """ schema_registry_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(schema_registry_conf) avro_serializer = AvroSerializer(schema_registry_client, schema_str, user_to_dict) producer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.serializer': StringSerializer('utf_8'), 'value.serializer': avro_serializer } producer = SerializingProducer(producer_conf) print("Producing user records to topic {}. ^C to exit.".format(topic)) while True: # Serve on_delivery callbacks from previous calls to produce() producer.poll(0.0) try: user_name = input("Enter name: ") user_address = input("Enter address: ") user_favorite_number = int(input("Enter favorite number: ")) user_favorite_color = input("Enter favorite color: ") user = User(name=user_name, address=user_address, favorite_color=user_favorite_color, favorite_number=user_favorite_number) producer.produce(topic=topic, key=str(uuid4()), value=user, on_delivery=delivery_report) except KeyboardInterrupt: break except ValueError: print("Invalid input, discarding record...") continue print("\nFlushing records...") producer.flush()
def _create_schemas_if_necessary(self, key, value) -> None: """ If no schemas have yet been created, this method will use the `key` and `value` instances to infer one. :param key: Key record to infer schema from. :param value: Value record to infer schema from. """ if self._key_schema is None: self._key_schema = to_key_schema(key) avro_key_serializer = AvroSerializer( schema_registry_client=self.schema_registry_client, schema_str=self._key_schema) self._producer._key_serializer = avro_key_serializer if self._value_schema is None: self._value_schema = to_value_schema(value) avro_value_serializer = AvroSerializer( schema_registry_client=self.schema_registry_client, schema_str=self._value_schema) self._producer._value_serializer = avro_value_serializer
def test_avro_serializer_config_subject_name_strategy(): """ Ensures subject.name.strategy is applied """ conf = {'url': TEST_URL} test_client = SchemaRegistryClient(conf) test_serializer = AvroSerializer(test_client, 'int', conf={'subject.name.strategy': record_subject_name_strategy}) assert test_serializer._subject_name_func is record_subject_name_strategy
def __init__(self, bootstrap_servers: str, topic: str, schema_registry_url: str, schema: str, config=None): super().__init__( bootstrap_servers, topic, AvroSerializer(schema, SchemaRegistryClient({"url": schema_registry_url})), config)
def test_avro_serializer_preload_schema_id(mock_schema_registry): """ Ensures serializer do not reload schema ID from registry after user has force its preloading. """ conf = {'url': TEST_URL} test_client = mock_schema_registry(conf) topic1 = "test-topic1" topic2 = "test-topic2" test_serializer = AvroSerializer("string", test_client, conf={'auto.register.schemas': False}) test_serializer.load_registry_schema_id( SerializationContext(topic1, MessageField.KEY)) test_serializer.load_registry_schema_id( SerializationContext(topic2, MessageField.VALUE)) # Ensure lookup_schema was invoked only once per shema assert test_client.counter['POST'].get( '/subjects/{}-key'.format(topic1)) == 1 assert test_client.counter['POST'].get( '/subjects/{}-value'.format(topic2)) == 1 test_serializer("test", SerializationContext(topic1, MessageField.KEY)) test_serializer("test", SerializationContext(topic2, MessageField.VALUE)) # Ensure we did not look again to avro registry assert test_client.counter['POST'].get( '/subjects/{}-key'.format(topic1)) == 1 assert test_client.counter['POST'].get( '/subjects/{}-value'.format(topic2)) == 1
def test_delivery_report_serialization(kafka_cluster, load_avsc, avsc, data, record_type): """ Tests basic Avro serializer functionality Args: kafka_cluster (KafkaClusterFixture): cluster fixture load_avsc (callable(str)): Avro file reader avsc (str) avsc: Avro schema file data (object): data to be serialized Raises: AssertionError on test failure """ topic = kafka_cluster.create_topic("serialization-avro-dr") sr = kafka_cluster.schema_registry() schema_str = load_avsc(avsc) value_serializer = AvroSerializer(sr, schema_str) value_deserializer = AvroDeserializer(sr, schema_str) producer = kafka_cluster.producer(value_serializer=value_serializer) def assert_cb(err, msg): actual = value_deserializer(SerializationContext(topic, MessageField.VALUE), msg.value()) if record_type == "record": assert [v == actual[k] for k, v in data.items()] elif record_type == 'float': assert data == pytest.approx(actual) else: assert actual == data producer.produce(topic, value=data, partition=0, on_delivery=assert_cb) producer.flush() consumer = kafka_cluster.consumer(value_deserializer=value_deserializer) consumer.assign([TopicPartition(topic, 0)]) msg = consumer.poll() actual = msg.value() # schema may include default which need not exist in the original if record_type == 'record': assert [v == actual[k] for k, v in data.items()] elif record_type == 'float': assert data == pytest.approx(actual) else: assert actual == data