Example #1
0
 def __init__(self, topic, kafkaConfig: AbstractKafkaConfig):
     self.topic = topic
     self.kafkaConfig = kafkaConfig
     kafka = KafkaClient(kafkaConfig.getKafkaBrokerIp())
     self.producer = SimpleProducer(kafka)
     self.client = SchemaRegistryClient(
         url=kafkaConfig.getSchemaRegistryUrl())
     self.serializer = MessageSerializer(self.client, False)
     if (topic is not None):
         self.schemaName = kafkaConfig.getSchemaByTopicName(self.topic)
         self.schema_id, self.avro_schema, self.schema_version = self.client.get_latest_schema(
             self.schemaName)
Example #2
0
    def setUp(self):
        self.subject = 'test_adv'
        self.schema = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA)

        self.client = SchemaRegistryClient('http://127.0.0.1:9001')
        self.client.register = MagicMock(return_value=1)
        self.client.get_by_id = MagicMock(retrun_value=self.schema)
        self.client.get_latest_schema = MagicMock(return_value=(1, self.schema,
                                                                1))
        self.client.get_version = MagicMock(return_value=1)
        self.ms = MessageSerializer(self.client, fast_avro=True)
        self.msslow = MessageSerializer(self.client, fast_avro=False)
        self.ms.get_schema = MagicMock(return_value=self.schema)
        self.msslow.get_schema = MagicMock(return_value=self.schema)
class KafkaSchemaRegistryUpdater(AbstractKafkaSchemaRegistryUpdater):
    def __init__(self, kafkaConfig: AbstractKafkaConfig,
                 localSchemaHolder: LocalSchemaHolder):
        super().__init__(kafkaConfig, localSchemaHolder)
        self.client = SchemaRegistryClient(
            url=kafkaConfig.getSchemaRegistryUrl())

    def checkIfSchemaExists(self, schemaName):
        schema_id, avro_schema, schema_version = self.client.get_latest_schema(
            schemaName)
        if schema_id is None:
            return False
        return True

    def getKafkaSchema(self, schemaName):
        schema_id, avro_schema, schema_version = self.client.get_latest_schema(
            schemaName)
        return avro_schema

    def getKafkaSchemaVersion(self, schemaName):
        schema_id, avro_schema, schema_version = self.client.get_latest_schema(
            schemaName)
        return schema_version

    def haveToUpdateKafkaSchema(self):
        updateFlag = self.localSchemaHolder.schemaHasBeenModified
        schemaName = self.localSchemaHolder.schemaName
        kafkaVersion = self.getKafkaSchemaVersion(schemaName)

        if (kafkaVersion is None):
            return True

        if updateFlag == True:
            return True
        return False

    def updateKafkaSchema(self):
        if self.haveToUpdateKafkaSchema():
            self.client.register(
                self.localSchemaHolder.schemaName,
                avro.schema.Parse(self.localSchemaHolder.schema.toJson()))
class LocalSchemaLoaderFromKafka(AbstractLocalSchemaLoader):
    def __init__(self, kafkaConfig: AbstractKafkaConfig, schemaName):
        super().__init__(schemaName)
        self.kafkaConfig = kafkaConfig
        self.client = SchemaRegistryClient(
            url=kafkaConfig.getSchemaRegistryUrl())

    def loadLocalSchema(self):
        schema_id, avro_schema, schema_version = self.client.get_latest_schema(
            self.schemaName)
        if avro_schema is None:
            raise Exception("Avro schema is not found, or does not exist!")

        from ..schema.AvroSchema import AvroSchema
        localSchema = LocalSchemaHolder(
            False, self.schemaName,
            AvroSchema(None, None).assignObject(avro_schema.to_json()))
        return localSchema
Example #5
0
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        self.subject = 'test_adv'
        self.schema = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA)

        self.client = SchemaRegistryClient('http://127.0.0.1:9001')
        self.client.register = MagicMock(return_value=1)
        self.client.get_by_id = MagicMock(retrun_value=self.schema)
        self.client.get_latest_schema = MagicMock(return_value=(1, self.schema,
                                                                1))
        self.client.get_version = MagicMock(return_value=1)
        self.ms = MessageSerializer(self.client, fast_avro=True)
        self.msslow = MessageSerializer(self.client, fast_avro=False)
        self.ms.get_schema = MagicMock(return_value=self.schema)
        self.msslow.get_schema = MagicMock(return_value=self.schema)

    def assertMessageIsSame(self, message, expected, schema_id):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded = self.ms.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)
        decoded = self.msslow.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)

    def test_encode_with_schema_id(self):
        adv_schema_id = self.client.register(self.subject, self.schema)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(
                adv_schema_id, self.schema, record)
            self.assertMessageIsSame(message, record, adv_schema_id)
            message = self.msslow.encode_record_with_schema_id(
                adv_schema_id, self.schema, record)
            self.assertMessageIsSame(message, record, adv_schema_id)

    def test_encode_record_for_topic(self):
        schema_id = self.client.register(self.subject, self.schema)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_for_topic(self.subject, record)
            self.assertMessageIsSame(message, record, schema_id)
            message = self.msslow.encode_record_for_topic(self.subject, record)
            self.assertMessageIsSame(message, record, schema_id)

    def test_encode_record_with_schema(self):
        schema_id = self.client.register(self.subject, self.schema)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(
                self.subject, self.schema, record)
            self.assertMessageIsSame(message, record, schema_id)
            message = self.msslow.encode_record_with_schema(
                self.subject, self.schema, record)
            self.assertMessageIsSame(message, record, schema_id)

    def test_decode_record(self):
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            encoded = self.ms.encode_record_with_schema(
                self.subject, self.schema, record)
            decoded = self.ms.decode_message(encoded)
            self.assertEqual(decoded, record)
            encoded = self.msslow.encode_record_with_schema(
                self.subject, self.schema, record)
            decoded = self.msslow.decode_message(encoded)
            self.assertEqual(decoded, record)

    def test_bad_input(self):
        adv_schema_id = self.client.register(self.subject, self.schema)

        with self.assertRaises(SerializerError):
            self.ms.encode_record_with_schema_id(adv_schema_id, self.schema,
                                                 'notadict')

        with self.assertRaises(SerializerError):
            self.ms.encode_record_with_schema_id(adv_schema_id, self.schema,
                                                 ['notadict'])

        with self.assertRaises(SerializerError):
            self.ms.encode_record_for_topic(self.subject, 'notadict')

        with self.assertRaises(SerializerError):
            self.ms.encode_record_for_topic(self.subject, ['notadict'])

        with self.assertRaises(SerializerError):
            self.ms.encode_record_with_schema(self.subject, self.schema,
                                              'notadict')

        with self.assertRaises(SerializerError):
            self.ms.encode_record_with_schema(self.subject, self.schema,
                                              ['notadict'])
Example #6
0
 def process_item(self, item, spider):
     client = SchemaRegistryClient(url='http://localhost:8081')
     schema_id, avro_schema, schema_version = client.get_latest_schema('beerscraper')
     serializer = MessageSerializer(client)
     encoded = serializer.encode_record_with_schema('beer',avro_schema,item.__dict__['_values'])
     self.producer.send('beer',encoded)
 def __init__(self, kafkaConfig: AbstractKafkaConfig,
              localSchemaHolder: LocalSchemaHolder):
     super().__init__(kafkaConfig, localSchemaHolder)
     self.client = SchemaRegistryClient(
         url=kafkaConfig.getSchemaRegistryUrl())
 def __init__(self, kafkaConfig: AbstractKafkaConfig, schemaName):
     super().__init__(schemaName)
     self.kafkaConfig = kafkaConfig
     self.client = SchemaRegistryClient(
         url=kafkaConfig.getSchemaRegistryUrl())
Example #9
0
 def setUp(self):
     self.server = mock_registry.ServerThread(9001)
     self.server.start()
     time.sleep(1)
     self.client = SchemaRegistryClient('http://127.0.0.1:9001')
#
# uses datamountaineer libraries https://github.com/datamountaineer/python-serializers
# and the avro.schema library
#

from datamountaineer.schemaregistry.client import SchemaRegistryClient
from datamountaineer.schemaregistry.serializers import MessageSerializer, Util
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter

#replace beer.avsc with input file or variable
with open('beer.avsc') as f:
    schema = avro.schema.Parse(f.read())

client = SchemaRegistryClient(url='http://localhost:8081')
schema_id = client.register('beerscraper', schema)