def setUp(self):

        self.testhost = os.environ.get('EASYAVRO_TESTING_HOST', 'localhost')
        c = CachedSchemaRegistryClient(url='http://{}:4002'.format(self.testhost))

        self.topic = 'easyavro-testing-topic'

        rp = ap(dn(__file__))
        with open(opj(rp, 'key.avsc'), 'rt') as f:
            avro_key_schema = schema.Parse(f.read())
        with open(opj(rp, 'value.avsc'), 'rt') as f:
            avro_value_schema = schema.Parse(f.read())

        c.register(self.topic + '-key', avro_key_schema)
        c.register(self.topic + '-value', avro_value_schema)

        self.bp = EasyAvroProducer(
            schema_registry_url='http://{}:4002'.format(self.testhost),
            kafka_brokers=['{}:4001'.format(self.testhost)],
            kafka_topic=self.topic
        )

        self.bc = EasyAvroConsumer(
            schema_registry_url='http://{}:4002'.format(self.testhost),
            kafka_brokers=['{}:4001'.format(self.testhost)],
            consumer_group='easyavro.testing',
            kafka_topic=self.topic,
            offset='earliest'
        )

        def on_recieve(key: str, value: str) -> None:
            self.recieved.append((key, value))
            L.info("Recieved message")
        self.recieved = []
        self.on_recieve = on_recieve
def update(topic, schema_config, force=False):
    """Given a topic, update (or create) a schema"""
    client = CachedSchemaRegistryClient(schema_config)

    if topic == 'all':
        schema_files = Path(__file__).parent.glob('**/*.avsc')
    else:
        schema_files = Path(__file__).parent.glob(f'**/{topic}-*.avsc')

    for schema_file in schema_files:
        with open(schema_file) as f:
            schema_str = f.read()
        schema_dict = json.loads(schema_str)
        avro_schema = schema.Parse(schema_str)

        subject = schema_dict['namespace'].replace('.', '-') + '-' + schema_dict['name']
        if force:
            client.update_compatibility('NONE', subject=subject)
        else:
            client.update_compatibility('BACKWARD', subject=subject)

        try:
            schema_id = client.register(subject, avro_schema)
            log.info(f'Added/updated {schema_file}\t Schema ID {schema_id}')
        except avro_error.ClientError as error:
            log.error(f'Error adding/updating {schema_file}: {error.message}')
class AvroSerializer(Serializer):
    def __init__(
            self,
            schema_registry_url: str,
            auto_register_schemas: bool = True,
            subject_name_strategy: SubjectNameStrategy = SubjectNameStrategy.
        RecordNameStrategy,
            **kwargs):
        super().__init__(**kwargs)
        schema_registry_url = schema_registry_url
        self.schema_registry = CachedSchemaRegistryClient(schema_registry_url)
        self.auto_register_schemas = auto_register_schemas
        self.subject_name_strategy = subject_name_strategy
        self._serializer_impl = AvroSerDeBase(self.schema_registry)

    def _get_subject(self, topic: str, schema, is_key=False):
        if self.subject_name_strategy == SubjectNameStrategy.TopicNameStrategy:
            subject = topic + ('-key' if is_key else '-value')
        elif self.subject_name_strategy == SubjectNameStrategy.RecordNameStrategy:
            subject = schema.fullname
        elif self.subject_name_strategy == SubjectNameStrategy.TopicRecordNameStrategy:
            subject = '{}-{}'.format(topic, schema.fullname)
        else:
            raise ValueError('Unknown SubjectNameStrategy')
        return subject

    def _ensure_schema(self, topic: str, schema, is_key=False):
        subject = self._get_subject(topic, schema, is_key)

        if self.auto_register_schemas:
            schema_id = self.schema_registry.register(subject, schema)
            schema = self.schema_registry.get_by_id(schema_id)
        else:
            schema_id, schema, _ = self.schema_registry.get_latest_schema(
                subject)

        return schema_id, schema

    def serialize(self, value: AvroRecord, topic: str, is_key=False, **kwargs):
        schema_id, _ = self._ensure_schema(topic, value.schema, is_key)
        return self._serializer_impl.encode_record_with_schema_id(
            schema_id, value, is_key)
Beispiel #4
0
class AvroSerializerBase(Serializer):
    def __init__(
        self,
        schema_registry_url: str,
        auto_register_schemas: bool = True,
        subject_name_strategy: SubjectNameStrategy = SubjectNameStrategy.RecordNameStrategy,
        **kwargs,
    ):
        super().__init__(**kwargs)
        schema_registry_url = schema_registry_url
        self.schema_registry = CachedSchemaRegistryClient(schema_registry_url)
        self.auto_register_schemas = auto_register_schemas
        self.subject_name_strategy = subject_name_strategy
        self._serializer_impl = AvroSerDeBase(self.schema_registry)

    def _get_subject(self, topic: str, schema, is_key=False):
        if self.subject_name_strategy == SubjectNameStrategy.TopicNameStrategy:
            subject = topic + ("-key" if is_key else "-value")
        elif self.subject_name_strategy == SubjectNameStrategy.RecordNameStrategy:
            subject = schema.fullname
        elif self.subject_name_strategy == SubjectNameStrategy.TopicRecordNameStrategy:
            subject = "{}-{}".format(topic, schema.fullname)
        else:
            raise ValueError("Unknown SubjectNameStrategy")
        return subject

    def _ensure_schema(self, topic: str, schema, is_key=False):
        subject = self._get_subject(topic, schema, is_key)

        if self.auto_register_schemas:
            schema_id = self.schema_registry.register(subject, schema)
            schema = self.schema_registry.get_by_id(schema_id)
        else:
            schema_id, schema, _ = self.schema_registry.get_latest_schema(subject)

        return schema_id, schema

    @abc.abstractmethod
    def serialize(self, value, topic, **kwargs):
        raise NotImplementedError
Beispiel #5
0
# io.confluent.kafka.serializers.subject.RecordNameStrategy:
# The subject name is the fully-qualified name of the Avro record type of the message.
# Thus, the schema registry checks the compatibility for a particular record type, regardless of topic.
# This setting allows any number of different event types in the same topic.

subject = schema.fullname  # == "my.test.value"

# io.confluent.kafka.serializers.subject.TopicRecordNameStrategy:
# The subject name is <topic>-<type>, where <topic> is the Kafka topic name, and <type> is the fully-qualified
# name of the Avro record type of the message. This setting also allows any number of event types in the same topic,
# and further constrains the compatibility check to the current topic only.

# subject = topic + '-' + schema.fullname # == "avro-python-producer-topic-my.test.value"

# get registered schema id from the schema_registry
schema_id = schema_registry.register(subject, schema)

for i in range(5):
    key = "key-" + str(i)
    value = "value-" + str(i)
    record_value = avro_serde.encode_record_with_schema_id(
        schema_id=schema_id,
        record={
            "name": value,
            "type": "avro"
        },
        is_key=False,
    )
    producer.produce(topic, key=key.encode('utf-8'), value=record_value)
    print("Produced:", key, record_value)
def process_csv(csv):

    schema_dict = {
        "name": "mil.darpa.oot.particles.releases",
        "type": "record",
        "doc": "A particle release",
        "fields": [
            { "name": "id", "type": "string", "doc": "Unique particle release identifier"},
            {
                "name": "records",
                "type": {
                    "type": "array",
                    "items": {
                        "type": "record",
                        "name": "release",
                        "fields": [
                            {"name": "time",       "type": "string",   "doc": "ISO8601 Date String"},
                            {"name": "lat",        "type": "double",   "doc": "wgs84 latitude"},
                            {"name": "lon",        "type": "double",   "doc": "wgs84 longitude"},
                            {"name": "nparticles", "type": "int",      "doc": "Number of particles released"}
                        ]
                    }
                }
            }
        ]
    }

    subject = 'mil-darpa-oot-particle-releases-value'
    client = CachedSchemaRegistryClient(url=f'http://{kafka_base}:7002')
    client.update_compatibility('NONE', subject=subject)

    avro_schema = schema.Parse(json.dumps(schema_dict))
    client.register(subject, avro_schema)

    df = pd.read_csv(
        StringIO(csv),
        header=None,
        names=['time', 'lat', 'lon', 'nparticles'],
        parse_dates=[0],
        infer_datetime_format=True
    )
    records_to_send = []
    for i, x in df.iterrows():
        x.time = x.time.isoformat()
        records_to_send.append(x.to_dict())

    if not records_to_send:
        raise ValueError("No particles to run")

    to_send = [(
        None,
        {
            'id': 'website-run',
            'records': records_to_send
        }
    )]

    p = EasyAvroProducer(
        schema_registry_url=f'http://{kafka_base}:7002',
        kafka_brokers=[f'{kafka_base}:7001'],
        kafka_topic='mil-darpa-oot-particle-releases',
        key_schema='nokey'
    )
    p.produce(to_send)