def setUp(self): self.testhost = os.environ.get('EASYAVRO_TESTING_HOST', 'localhost') c = CachedSchemaRegistryClient(url='http://{}:4002'.format(self.testhost)) self.topic = 'easyavro-testing-topic' rp = ap(dn(__file__)) with open(opj(rp, 'key.avsc'), 'rt') as f: avro_key_schema = schema.Parse(f.read()) with open(opj(rp, 'value.avsc'), 'rt') as f: avro_value_schema = schema.Parse(f.read()) c.register(self.topic + '-key', avro_key_schema) c.register(self.topic + '-value', avro_value_schema) self.bp = EasyAvroProducer( schema_registry_url='http://{}:4002'.format(self.testhost), kafka_brokers=['{}:4001'.format(self.testhost)], kafka_topic=self.topic ) self.bc = EasyAvroConsumer( schema_registry_url='http://{}:4002'.format(self.testhost), kafka_brokers=['{}:4001'.format(self.testhost)], consumer_group='easyavro.testing', kafka_topic=self.topic, offset='earliest' ) def on_recieve(key: str, value: str) -> None: self.recieved.append((key, value)) L.info("Recieved message") self.recieved = [] self.on_recieve = on_recieve
def update(topic, schema_config, force=False): """Given a topic, update (or create) a schema""" client = CachedSchemaRegistryClient(schema_config) if topic == 'all': schema_files = Path(__file__).parent.glob('**/*.avsc') else: schema_files = Path(__file__).parent.glob(f'**/{topic}-*.avsc') for schema_file in schema_files: with open(schema_file) as f: schema_str = f.read() schema_dict = json.loads(schema_str) avro_schema = schema.Parse(schema_str) subject = schema_dict['namespace'].replace('.', '-') + '-' + schema_dict['name'] if force: client.update_compatibility('NONE', subject=subject) else: client.update_compatibility('BACKWARD', subject=subject) try: schema_id = client.register(subject, avro_schema) log.info(f'Added/updated {schema_file}\t Schema ID {schema_id}') except avro_error.ClientError as error: log.error(f'Error adding/updating {schema_file}: {error.message}')
class AvroSerializer(Serializer): def __init__( self, schema_registry_url: str, auto_register_schemas: bool = True, subject_name_strategy: SubjectNameStrategy = SubjectNameStrategy. RecordNameStrategy, **kwargs): super().__init__(**kwargs) schema_registry_url = schema_registry_url self.schema_registry = CachedSchemaRegistryClient(schema_registry_url) self.auto_register_schemas = auto_register_schemas self.subject_name_strategy = subject_name_strategy self._serializer_impl = AvroSerDeBase(self.schema_registry) def _get_subject(self, topic: str, schema, is_key=False): if self.subject_name_strategy == SubjectNameStrategy.TopicNameStrategy: subject = topic + ('-key' if is_key else '-value') elif self.subject_name_strategy == SubjectNameStrategy.RecordNameStrategy: subject = schema.fullname elif self.subject_name_strategy == SubjectNameStrategy.TopicRecordNameStrategy: subject = '{}-{}'.format(topic, schema.fullname) else: raise ValueError('Unknown SubjectNameStrategy') return subject def _ensure_schema(self, topic: str, schema, is_key=False): subject = self._get_subject(topic, schema, is_key) if self.auto_register_schemas: schema_id = self.schema_registry.register(subject, schema) schema = self.schema_registry.get_by_id(schema_id) else: schema_id, schema, _ = self.schema_registry.get_latest_schema( subject) return schema_id, schema def serialize(self, value: AvroRecord, topic: str, is_key=False, **kwargs): schema_id, _ = self._ensure_schema(topic, value.schema, is_key) return self._serializer_impl.encode_record_with_schema_id( schema_id, value, is_key)
class AvroSerializerBase(Serializer): def __init__( self, schema_registry_url: str, auto_register_schemas: bool = True, subject_name_strategy: SubjectNameStrategy = SubjectNameStrategy.RecordNameStrategy, **kwargs, ): super().__init__(**kwargs) schema_registry_url = schema_registry_url self.schema_registry = CachedSchemaRegistryClient(schema_registry_url) self.auto_register_schemas = auto_register_schemas self.subject_name_strategy = subject_name_strategy self._serializer_impl = AvroSerDeBase(self.schema_registry) def _get_subject(self, topic: str, schema, is_key=False): if self.subject_name_strategy == SubjectNameStrategy.TopicNameStrategy: subject = topic + ("-key" if is_key else "-value") elif self.subject_name_strategy == SubjectNameStrategy.RecordNameStrategy: subject = schema.fullname elif self.subject_name_strategy == SubjectNameStrategy.TopicRecordNameStrategy: subject = "{}-{}".format(topic, schema.fullname) else: raise ValueError("Unknown SubjectNameStrategy") return subject def _ensure_schema(self, topic: str, schema, is_key=False): subject = self._get_subject(topic, schema, is_key) if self.auto_register_schemas: schema_id = self.schema_registry.register(subject, schema) schema = self.schema_registry.get_by_id(schema_id) else: schema_id, schema, _ = self.schema_registry.get_latest_schema(subject) return schema_id, schema @abc.abstractmethod def serialize(self, value, topic, **kwargs): raise NotImplementedError
# io.confluent.kafka.serializers.subject.RecordNameStrategy: # The subject name is the fully-qualified name of the Avro record type of the message. # Thus, the schema registry checks the compatibility for a particular record type, regardless of topic. # This setting allows any number of different event types in the same topic. subject = schema.fullname # == "my.test.value" # io.confluent.kafka.serializers.subject.TopicRecordNameStrategy: # The subject name is <topic>-<type>, where <topic> is the Kafka topic name, and <type> is the fully-qualified # name of the Avro record type of the message. This setting also allows any number of event types in the same topic, # and further constrains the compatibility check to the current topic only. # subject = topic + '-' + schema.fullname # == "avro-python-producer-topic-my.test.value" # get registered schema id from the schema_registry schema_id = schema_registry.register(subject, schema) for i in range(5): key = "key-" + str(i) value = "value-" + str(i) record_value = avro_serde.encode_record_with_schema_id( schema_id=schema_id, record={ "name": value, "type": "avro" }, is_key=False, ) producer.produce(topic, key=key.encode('utf-8'), value=record_value) print("Produced:", key, record_value)
def process_csv(csv): schema_dict = { "name": "mil.darpa.oot.particles.releases", "type": "record", "doc": "A particle release", "fields": [ { "name": "id", "type": "string", "doc": "Unique particle release identifier"}, { "name": "records", "type": { "type": "array", "items": { "type": "record", "name": "release", "fields": [ {"name": "time", "type": "string", "doc": "ISO8601 Date String"}, {"name": "lat", "type": "double", "doc": "wgs84 latitude"}, {"name": "lon", "type": "double", "doc": "wgs84 longitude"}, {"name": "nparticles", "type": "int", "doc": "Number of particles released"} ] } } } ] } subject = 'mil-darpa-oot-particle-releases-value' client = CachedSchemaRegistryClient(url=f'http://{kafka_base}:7002') client.update_compatibility('NONE', subject=subject) avro_schema = schema.Parse(json.dumps(schema_dict)) client.register(subject, avro_schema) df = pd.read_csv( StringIO(csv), header=None, names=['time', 'lat', 'lon', 'nparticles'], parse_dates=[0], infer_datetime_format=True ) records_to_send = [] for i, x in df.iterrows(): x.time = x.time.isoformat() records_to_send.append(x.to_dict()) if not records_to_send: raise ValueError("No particles to run") to_send = [( None, { 'id': 'website-run', 'records': records_to_send } )] p = EasyAvroProducer( schema_registry_url=f'http://{kafka_base}:7002', kafka_brokers=[f'{kafka_base}:7001'], kafka_topic='mil-darpa-oot-particle-releases', key_schema='nokey' ) p.produce(to_send)