def __init__(self): config = self.load_config(self.CONFIG_FILE) sc = CachedSchemaRegistryClient(url=config['kafkaSchemaRegistryUrl']) self.topic = config['kafkaTopics'][0] key_schema = sc.get_latest_schema(self.topic + "-key")[1] val_schema = sc.get_latest_schema(self.topic + "-value")[1] self.producer = AvroProducer( { 'bootstrap.servers': config['kafkaBootstrapServers'], 'schema.registry.url': config['kafkaSchemaRegistryUrl'] }, default_key_schema=key_schema, default_value_schema=val_schema)
class EasyAvroProducer(BaseProducer, AvroProducer): def __init__(self, schema_registry_url: str, kafka_brokers: List[str], kafka_topic: str, value_schema: schema.Schema = None, key_schema: schema.Schema = None, debug: bool = False, kafka_conf: dict = None, py_conf: dict = None) -> None: self.kafka_topic = kafka_topic self._client = CachedSchemaRegistryClient( dict(url=schema_registry_url)) # Value Schema if value_schema is None: vs_name = '{}-value'.format(self.kafka_topic) _, value_schema, _ = self._client.get_latest_schema(vs_name) if value_schema is None: raise ValueError( 'Schema "{}" not found in registry'.format(vs_name)) # Key Schema if key_schema is None: ks_name = '{}-key'.format(self.kafka_topic) _, key_schema, _ = self._client.get_latest_schema(ks_name) if key_schema is None: raise ValueError( 'Schema "{}" not found in registry'.format(ks_name)) conf = { 'bootstrap.servers': ','.join(kafka_brokers), 'schema.registry.url': schema_registry_url, 'client.id': self.__class__.__name__, 'api.version.request': 'true', } if debug is True: conf['debug'] = 'msg' kafka_conf = kafka_conf or {} py_conf = py_conf or {} super().__init__({ **conf, **kafka_conf }, default_value_schema=value_schema, default_key_schema=key_schema, **py_conf)
class EasyAvroProducer(AvroProducer): def __init__(self, schema_registry_url: str, kafka_brokers: List[str], kafka_topic: str, value_schema: schema.Schema = None, key_schema: schema.Schema = None, debug: bool = False) -> None: self.kafka_topic = kafka_topic self._client = CachedSchemaRegistryClient(url=schema_registry_url) # Value Schema if value_schema is None: vs_name = '{}-value'.format(self.kafka_topic) _, value_schema, _ = self._client.get_latest_schema(vs_name) if value_schema is None: raise ValueError( 'Schema "{}" not found in registry'.format(vs_name)) # Key Schema if key_schema is None: ks_name = '{}-key'.format(self.kafka_topic) _, key_schema, _ = self._client.get_latest_schema(ks_name) if key_schema is None: raise ValueError( 'Schema "{}" not found in registry'.format(ks_name)) conf = { 'bootstrap.servers': ','.join(kafka_brokers), 'schema.registry.url': schema_registry_url, 'client.id': self.__class__.__name__, 'api.version.request': 'true' } if debug: conf['debug'] = 'msg' super().__init__(conf, default_value_schema=value_schema, default_key_schema=key_schema) def produce(self, records: List[Tuple]) -> None: for i, r in enumerate(records): super().produce(topic=self.kafka_topic, key=r[0], value=r[1]) L.info("{}/{} messages".format(i + 1, len(records))) L.debug("Flushing producer...") self.flush() L.info("Done producing")
class AvroSerializer(Serializer): def __init__( self, schema_registry_url: str, auto_register_schemas: bool = True, subject_name_strategy: SubjectNameStrategy = SubjectNameStrategy. RecordNameStrategy, **kwargs): super().__init__(**kwargs) schema_registry_url = schema_registry_url self.schema_registry = CachedSchemaRegistryClient(schema_registry_url) self.auto_register_schemas = auto_register_schemas self.subject_name_strategy = subject_name_strategy self._serializer_impl = AvroSerDeBase(self.schema_registry) def _get_subject(self, topic: str, schema, is_key=False): if self.subject_name_strategy == SubjectNameStrategy.TopicNameStrategy: subject = topic + ('-key' if is_key else '-value') elif self.subject_name_strategy == SubjectNameStrategy.RecordNameStrategy: subject = schema.fullname elif self.subject_name_strategy == SubjectNameStrategy.TopicRecordNameStrategy: subject = '{}-{}'.format(topic, schema.fullname) else: raise ValueError('Unknown SubjectNameStrategy') return subject def _ensure_schema(self, topic: str, schema, is_key=False): subject = self._get_subject(topic, schema, is_key) if self.auto_register_schemas: schema_id = self.schema_registry.register(subject, schema) schema = self.schema_registry.get_by_id(schema_id) else: schema_id, schema, _ = self.schema_registry.get_latest_schema( subject) return schema_id, schema def serialize(self, value: AvroRecord, topic: str, is_key=False, **kwargs): schema_id, _ = self._ensure_schema(topic, value.schema, is_key) return self._serializer_impl.encode_record_with_schema_id( schema_id, value, is_key)
class AvroSerializerBase(Serializer): def __init__( self, schema_registry_url: str, auto_register_schemas: bool = True, subject_name_strategy: SubjectNameStrategy = SubjectNameStrategy.RecordNameStrategy, **kwargs, ): super().__init__(**kwargs) schema_registry_url = schema_registry_url self.schema_registry = CachedSchemaRegistryClient(schema_registry_url) self.auto_register_schemas = auto_register_schemas self.subject_name_strategy = subject_name_strategy self._serializer_impl = AvroSerDeBase(self.schema_registry) def _get_subject(self, topic: str, schema, is_key=False): if self.subject_name_strategy == SubjectNameStrategy.TopicNameStrategy: subject = topic + ("-key" if is_key else "-value") elif self.subject_name_strategy == SubjectNameStrategy.RecordNameStrategy: subject = schema.fullname elif self.subject_name_strategy == SubjectNameStrategy.TopicRecordNameStrategy: subject = "{}-{}".format(topic, schema.fullname) else: raise ValueError("Unknown SubjectNameStrategy") return subject def _ensure_schema(self, topic: str, schema, is_key=False): subject = self._get_subject(topic, schema, is_key) if self.auto_register_schemas: schema_id = self.schema_registry.register(subject, schema) schema = self.schema_registry.get_by_id(schema_id) else: schema_id, schema, _ = self.schema_registry.get_latest_schema(subject) return schema_id, schema @abc.abstractmethod def serialize(self, value, topic, **kwargs): raise NotImplementedError
confluentSchemaRegistrySecret = cfg.pwd['confluentSchemaRegistrySecret'] r = requests.get( 'https://covid-19api.com/api/states-latest?filter[country]=US') covid = r.json() client = CachedSchemaRegistryClient({ 'url': 'https://psrc-4r0k9.westus2.azure.confluent.cloud', 'basic.auth.credentials.source': 'USER_INFO', 'basic.auth.user.info': f'{confluentSchemaRegistryKey}:{confluentSchemaRegistrySecret}' }) SavedSchema = client.get_latest_schema('covid-value')[1] p = AvroProducer( { 'bootstrap.servers': "pkc-41973.westus2.azure.confluent.cloud:9092", 'security.protocol': 'SASL_SSL', 'sasl.mechanism': 'PLAIN', 'sasl.username': confluentKey, 'sasl.password': confluentSecret, 'schema.registry.url': 'https://psrc-4r0k9.westus2.azure.confluent.cloud',
randomWeather = r.json() # API returns json with names beginning with numbers. This function renames these fields. import functions.WeatherAvroSchemaRenamer as renamer randomWeather = renamer.rename(randomWeather) client = CachedSchemaRegistryClient({ 'url': 'https://psrc-4r0k9.westus2.azure.confluent.cloud', 'basic.auth.credentials.source': 'USER_INFO', 'basic.auth.user.info': f'{confluentSchemaRegistryKey}:{confluentSchemaRegistrySecret}' }) SavedSchema = client.get_latest_schema('weather-value')[1] p = AvroProducer( { 'bootstrap.servers': "pkc-41973.westus2.azure.confluent.cloud:9092", 'security.protocol': 'SASL_SSL', 'sasl.mechanism': 'PLAIN', 'sasl.username': confluentKey, 'sasl.password': confluentSecret, 'schema.registry.url': 'https://psrc-4r0k9.westus2.azure.confluent.cloud',
from requests import get from pprint import pprint from time import sleep from confluent_kafka.avro import AvroProducer from confluent_kafka.avro import CachedSchemaRegistryClient client = CachedSchemaRegistryClient({'url': 'http://13.82.6.66:8081'}) SavedSchema = client.get_latest_schema('ISS-value')[1] p = AvroProducer( { 'bootstrap.servers': "13.82.6.66:9092", 'schema.registry.url': 'http://13.82.6.66:8081', }, default_value_schema=SavedSchema) while True: r = get("http://api.open-notify.org/iss-now.json") ISS = r.json() message = { 'timestamp': ISS['timestamp'], 'latitude': float(ISS['iss_position']['latitude']), 'longitude': float(ISS['iss_position']['longitude']) } pprint(message) try: p.produce(topic='ISS', value=message, partition=0) p.flush()
import generalconfig as cfg confluentKey = cfg.pwd['confluentKey'] confluentSecret = cfg.pwd['confluentSecret'] confluentSchemaRegistryKey = cfg.pwd['confluentSchemaRegistryKey'] confluentSchemaRegistrySecret = cfg.pwd['confluentSchemaRegistrySecret'] client = CachedSchemaRegistryClient({ 'url': 'https://psrc-4r0k9.westus2.azure.confluent.cloud', 'basic.auth.credentials.source': 'USER_INFO', 'basic.auth.user.info': f'{confluentSchemaRegistryKey}:{confluentSchemaRegistrySecret}' }) SavedSchema = client.get_latest_schema('Race-value')[1] p = AvroProducer( { 'bootstrap.servers': "pkc-41973.westus2.azure.confluent.cloud:9092", 'security.protocol': 'SASL_SSL', 'sasl.mechanism': 'PLAIN', 'sasl.username': confluentKey, 'sasl.password': confluentSecret, 'schema.registry.url': 'https://psrc-4r0k9.westus2.azure.confluent.cloud',