Ejemplo n.º 1
0
 def __init__(
         self,
         schema_registry_url: str,
         auto_register_schemas: bool = True,
         subject_name_strategy: SubjectNameStrategy = SubjectNameStrategy.
     RecordNameStrategy,
         **kwargs):
     super().__init__(**kwargs)
     schema_registry_url = schema_registry_url
     self.schema_registry = CachedSchemaRegistryClient(schema_registry_url)
     self.auto_register_schemas = auto_register_schemas
     self.subject_name_strategy = subject_name_strategy
     self._serializer_impl = AvroSerDeBase(self.schema_registry)
 def __init__(self):
     config = self.load_config(self.CONFIG_FILE)
     sc = CachedSchemaRegistryClient(url=config['kafkaSchemaRegistryUrl'])
     self.topic = config['kafkaTopics'][0]
     key_schema = sc.get_latest_schema(self.topic + "-key")[1]
     val_schema = sc.get_latest_schema(self.topic + "-value")[1]
     self.producer = AvroProducer(
         {
             'bootstrap.servers': config['kafkaBootstrapServers'],
             'schema.registry.url': config['kafkaSchemaRegistryUrl']
         },
         default_key_schema=key_schema,
         default_value_schema=val_schema)
Ejemplo n.º 3
0
class EasyAvroProducer(BaseProducer, AvroProducer):
    def __init__(self,
                 schema_registry_url: str,
                 kafka_brokers: List[str],
                 kafka_topic: str,
                 value_schema: schema.Schema = None,
                 key_schema: schema.Schema = None,
                 debug: bool = False,
                 kafka_conf: dict = None,
                 py_conf: dict = None) -> None:

        self.kafka_topic = kafka_topic
        self._client = CachedSchemaRegistryClient(
            dict(url=schema_registry_url))

        # Value Schema
        if value_schema is None:
            vs_name = '{}-value'.format(self.kafka_topic)
            _, value_schema, _ = self._client.get_latest_schema(vs_name)
            if value_schema is None:
                raise ValueError(
                    'Schema "{}" not found in registry'.format(vs_name))

        # Key Schema
        if key_schema is None:
            ks_name = '{}-key'.format(self.kafka_topic)
            _, key_schema, _ = self._client.get_latest_schema(ks_name)
            if key_schema is None:
                raise ValueError(
                    'Schema "{}" not found in registry'.format(ks_name))

        conf = {
            'bootstrap.servers': ','.join(kafka_brokers),
            'schema.registry.url': schema_registry_url,
            'client.id': self.__class__.__name__,
            'api.version.request': 'true',
        }

        if debug is True:
            conf['debug'] = 'msg'

        kafka_conf = kafka_conf or {}
        py_conf = py_conf or {}

        super().__init__({
            **conf,
            **kafka_conf
        },
                         default_value_schema=value_schema,
                         default_key_schema=key_schema,
                         **py_conf)
Ejemplo n.º 4
0
class EasyAvroProducer(AvroProducer):
    def __init__(self,
                 schema_registry_url: str,
                 kafka_brokers: List[str],
                 kafka_topic: str,
                 value_schema: schema.Schema = None,
                 key_schema: schema.Schema = None,
                 debug: bool = False) -> None:

        self.kafka_topic = kafka_topic
        self._client = CachedSchemaRegistryClient(url=schema_registry_url)

        # Value Schema
        if value_schema is None:
            vs_name = '{}-value'.format(self.kafka_topic)
            _, value_schema, _ = self._client.get_latest_schema(vs_name)
            if value_schema is None:
                raise ValueError(
                    'Schema "{}" not found in registry'.format(vs_name))

        # Key Schema
        if key_schema is None:
            ks_name = '{}-key'.format(self.kafka_topic)
            _, key_schema, _ = self._client.get_latest_schema(ks_name)
            if key_schema is None:
                raise ValueError(
                    'Schema "{}" not found in registry'.format(ks_name))

        conf = {
            'bootstrap.servers': ','.join(kafka_brokers),
            'schema.registry.url': schema_registry_url,
            'client.id': self.__class__.__name__,
            'api.version.request': 'true'
        }
        if debug:
            conf['debug'] = 'msg'
        super().__init__(conf,
                         default_value_schema=value_schema,
                         default_key_schema=key_schema)

    def produce(self, records: List[Tuple]) -> None:
        for i, r in enumerate(records):
            super().produce(topic=self.kafka_topic, key=r[0], value=r[1])
            L.info("{}/{} messages".format(i + 1, len(records)))

        L.debug("Flushing producer...")
        self.flush()
        L.info("Done producing")
Ejemplo n.º 5
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        # Configure the broker properties
        self.broker_properties = {
            "BROKER_URL": "PLAINTEXT://localhost:9092",
            "SCHEMA_REGISTRY_URL": "http://localhost:8081",
            "group.id": f"{self.topic_name}",
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # Configure the AvroProducer
        self.producer = AvroProducer(
            {"bootstrap.servers": self.broker_properties["BROKER_URL"]},
            schema_registry=CachedSchemaRegistryClient(
                {"url": self.broker_properties["SCHEMA_REGISTRY_URL"]}, ))
Ejemplo n.º 6
0
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest
        self.BROKER_URL = 'PLAINTEXT://localhost:9092'
        self.SCHEMA_REGISTRY_URL = 'http://localhost:8081'

        self.broker_properties = {
            "bootstrap.servers": self.BROKER_URL,
            "group.id": "0"
        }

        if is_avro is True:
            schema_registry = CachedSchemaRegistryClient(
                {"url": self.SCHEMA_REGISTRY_URL})
            self.consumer = AvroConsumer(self.broker_properties,
                                         schema_registry=schema_registry)
        else:
            self.consumer = Consumer(self.broker_properties)
            pass

        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        self.broker_properties = {"bootstrap.servers": KAFKA_BROKER_URL}

        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        schema_registry = CachedSchemaRegistryClient("http://localhost:8081")

        self.producer = AvroProducer(self.broker_properties,
                                     default_key_schema=self.key_schema,
                                     default_value_schema=self.value_schema,
                                     schema_registry=schema_registry)
Ejemplo n.º 8
0
async def consume(topic_name):
    """Consumes data from the Kafka Topic"""
    #
    #     Create a CachedSchemaRegistryClient
    #
    schema_registry = CachedSchemaRegistryClient({"url": SCHEMA_REGISTRY_URL})

    #
    #     Use the Avro Consumer
    #
    c = AvroConsumer(
        {
            "bootstrap.servers": BROKER_URL,
            "group.id": "0"
        },
        schema_registry=schema_registry,
    )
    c.subscribe([topic_name])
    while True:
        message = c.poll(1.0)
        if message is None:
            print("no message received by consumer")
        elif message.error() is not None:
            print(f"error from consumer {message.error()}")
        else:
            try:
                #The print from console
                print(message.value())
            except KeyError as e:
                print(f"Failed to unpack message {e}")
        await asyncio.sleep(1.0)
Ejemplo n.º 9
0
async def consume(topic_name):
    """Consumes data from the Kafka Topic"""
    #
    # TODO: Create a CachedSchemaRegistryClient
    #
    schema_registry = CachedSchemaRegistryClient(SCHEMA_REGISTRY_URL)

    #
    # TODO: Use the Avro Consumer See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html
    #  ?highlight=loads#confluent_kafka.avro.AvroConsumer
    #
    #c = AvroConsumer({"bootstrap.servers": BROKER_URL, "group.id": "0","auto.offset.reset":'earliest'}, schema_registry=schema_registry)
    c = Consumer({
        "bootstrap.servers": BROKER_URL,
        "group.id": "0",
        "auto.offset.reset": 'earliest'
    })
    c.subscribe([topic_name])
    while True:
        try:
            msg = c.poll(1.0)
        except SerializerError as e:
            print("Message deserialization failed for {}: {}".format(msg, e))
            raise SerializerError
        if msg is None:
            print("message is not received yet")
        elif msg.error():
            print("AvroConsumer error: {}".format(msg.error()))
            return
        else:
            key, value = msg.key(), msg.value()
            print(key, value)
        await asyncio.sleep(1.0)
Ejemplo n.º 10
0
    def __init__(self,
                 topic_name_pattern,
                 message_handler,
                 is_avro=True,
                 offset_earliest=True):
        """Creates a consumer object for asynchronous use"""
        self._topic_name_pattern = topic_name_pattern
        self._message_handler = message_handler
        self._offset_earliest = offset_earliest

        conf = {
            "bootstrap.servers": config["KAFKA"]["BROKER_URL"],
            "group.id": "0",
            "auto.offset.reset": "earliest"
        }
        if is_avro is True:
            schema_registry = CachedSchemaRegistryClient(
                {"url": config["KAFKA"]["SCHEMA_REGISTRY_URL"]})
            self._consumer = AvroConsumer(conf,
                                          schema_registry=schema_registry)
        else:
            self._consumer = Consumer(conf)

        self._consumer.subscribe([self._topic_name_pattern],
                                 on_assign=self._on_assign)

        self._timeout = float(config["PARAM"]["CONSUMER_POLL_TIMEOUT"])
Ejemplo n.º 11
0
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        self.broker_properties = {
            'BROKER_URL': 'PLAINTEXT://localhost:9092',
            'SCHEMA_REGISTRY': 'http://localhost:8081',
        }

        schema_registry = CachedSchemaRegistryClient({
            'url':
            self.broker_properties['SCHEMA_REGISTRY'],
            'ssl.ca.location':
            None,
            'ssl.certificate.location':
            None,
            'ssl.key.location':
            None
        })

        # Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties[
                "schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(
                {
                    "bootstrap.servers": self.broker_properties['BROKER_URL'],
                    "group.id": "0"
                },
                schema_registry=schema_registry)
        else:
            self.consumer = Consumer({
                "bootstrap.servers":
                self.broker_properties['BROKER_URL'],
                "group.id":
                "0",
                "auto.offset.reset":
                "earliest"
            })

        #
        #
        # Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        #
        #
        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)
Ejemplo n.º 12
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            'bootstrap.servers': BROKER_URL,
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        self.producer = AvroProducer(
            self.broker_properties,
            schema_registry=CachedSchemaRegistryClient(SCHEMA_REGISTRY_URL))
Ejemplo n.º 13
0
async def converter(CONSUME_TOPIC, PRODUCE_TOPIC, BROKER_URL,
                    SCHEMA_REGISTRY_URL):
    """Consumes data from the Kafka Topic
    """
    schema_registry = CachedSchemaRegistryClient({"url": SCHEMA_REGISTRY_URL})

    c = AvroConsumer(
        {
            "bootstrap.servers": BROKER_URL,
            "client.id": "project-insight",
            "group.id": "convertor-in-consumer",
            "auto.offset.reset": "earliest",
        },
        schema_registry=schema_registry,
    )
    c.subscribe([CONSUME_TOPIC])

    p = Producer({"bootstrap.servers": BROKER_URL})

    while True:
        message = c.poll(1.0)
        if message is None:
            logger.info("no message received by consumer")
        elif message.error() is not None:
            logger.error(f"error from consumer {message.error()}")
        else:
            try:
                print(message.value())
                p.produce(topic=PRODUCE_TOPIC,
                          key=str(uuid4()),
                          value=json.dumps(message.value()))

            except KeyError as e:
                logger.error(f"Failed to unpack message {e}")
        await asyncio.sleep(0.01)
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        self.broker_properties = {
            "bootstrap.servers": BROKER_URL,
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic(self.topic_name)
            Producer.existing_topics.add(self.topic_name)

        schema_registry = CachedSchemaRegistryClient(SCHEMA_REGISTRY_URL)
        self.producer = AvroProducer(
            self.broker_properties,
            schema_registry=schema_registry
        )
Ejemplo n.º 15
0
async def produce(topic_name):
    """Produces data into the Kafka Topic"""
    #
    # TODO: Create a CachedSchemaRegistryClient. Use SCHEMA_REGISTRY_URL.
    #       See: https://github.com/confluentinc/confluent-kafka-python/blob/master/confluent_kafka/avro/cached_schema_registry_client.py#L47
    #
    schema_registry = CachedSchemaRegistryClient({"url": SCHEMA_REGISTRY_URL})

    #
    # TODO: Replace with an AvroProducer.
    #       See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=loads#confluent_kafka.avro.AvroProducer
    #
    p = AvroProducer({"bootstrap.servers": BROKER_URL},
                     schema_registry=schema_registry)
    while True:
        #
        # TODO: Replace with an AvroProducer produce. Make sure to specify the schema!
        #       Tip: Make sure to serialize the ClickEvent with `asdict(ClickEvent())`
        #       See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=loads#confluent_kafka.avro.AvroProducer
        #
        p.produce(
            topic=topic_name,
            value=asdict(ClickEvent()),
            # TODO: Supply schema
            value_schema=ClickEvent.schema)
        await asyncio.sleep(1.0)
Ejemplo n.º 16
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)
        else:
            logger.debug("Topic already exists: %s", self.topic_name)

        schema_registry = CachedSchemaRegistryClient(
            {"url": SCHEMA_REGISTRY_URL})
        self.producer = AvroProducer({"bootstrap.servers": BROKER_URL},
                                     schema_registry=schema_registry)
Ejemplo n.º 17
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        schema_registry = CachedSchemaRegistryClient(self.SCHEMA_REGISTRY_URL)

        self.broker_properties = {
            "bootstrap.servers": self.BROKER_URL,
            "linger.ms": 500,
            "acks": 1,
            "retries": 3,
            "message.max.bytes": 4096,
            "batch.num.messages": 10
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        self.producer: AvroProducer = AvroProducer(
            self.broker_properties, schema_registry=schema_registry)
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas
        self.client = AdminClient({"bootstrap.servers": BROKER_URL})

        acks = (self.num_replicas -
                1) if self.num_replicas > 2 else self.num_replicas
        self.broker_properties = {
            "bootstrap.servers": BROKER_URL,
            "client.id":
            f"producer_client_id_{self.topic_name.strip().replace(' ', '')}",
            "acks": acks
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        schema_registry = CachedSchemaRegistryClient(
            {"url": SCHEMA_REGISTRY_URL})

        self.producer = AvroProducer(self.broker_properties,
                                     schema_registry=schema_registry)
Ejemplo n.º 19
0
async def consume(topic_name):
    """Consumes data from the Kafka Topic"""
    #
    # TODO: Create a CachedSchemaRegistryClient
    #
    schema_registry = CachedSchemaRegistryClient({"url": SCHEMA_REGISTRY_URL})

    #
    # TODO: Use the Avro Consumer
    #       See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=loads#confluent_kafka.avro.AvroConsumer
    #
    c = AvroConsumer(
        {
            "bootstrap.servers": BROKER_URL,
            "group.id": "0"
        },
        schema_registry=schema_registry,
    )
    c.subscribe([topic_name])
    while True:
        message = c.poll(1.0)
        if message is None:
            print("no message received by consumer")
        elif message.error() is not None:
            print(f"error from consumer {message.error()}")
        else:
            try:
                print(message.value())
            except KeyError as e:
                print(f"Failed to unpack message {e}")
        await asyncio.sleep(1.0)
Ejemplo n.º 20
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas
        # configure kafka registry
        self.schema_registry = CachedSchemaRegistryClient(KAFKA_REGISTRY_URL)

        self.broker_properties = {'bootstrap.servers': KAFKA_BROKER_URL}

        self.client = AdminClient(self.broker_properties)
        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        self.producer = AvroProducer(self.broker_properties,
                                     default_key_schema=self.key_schema,
                                     default_value_schema=self.value_schema,
                                     schema_registry=self.schema_registry)
Ejemplo n.º 21
0
    def __init__(self,
                 config,
                 schema_registry=None,
                 reader_key_schema=None,
                 reader_value_schema=None):

        sr_conf = {
            key.replace("schema.registry.", ""): value
            for key, value in config.items()
            if key.startswith("schema.registry")
        }

        if sr_conf.get("basic.auth.credentials.source") == 'SASL_INHERIT':
            sr_conf['sasl.mechanisms'] = config.get('sasl.mechanisms', '')
            sr_conf['sasl.username'] = config.get('sasl.username', '')
            sr_conf['sasl.password'] = config.get('sasl.password', '')

        ap_conf = {
            key: value
            for key, value in config.items()
            if not key.startswith("schema.registry")
        }

        if schema_registry is None:
            schema_registry = CachedSchemaRegistryClient(sr_conf)
        elif sr_conf.get("url", None) is not None:
            raise ValueError(
                "Cannot pass schema_registry along with schema.registry.url config"
            )

        super(StringAvroConsumer, self).__init__(ap_conf)
        self._serializer = MessageSerializer(schema_registry,
                                             reader_key_schema,
                                             reader_value_schema)
Ejemplo n.º 22
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        self.broker_properties = {
            "bootstrap.servers": "PLAINTEXT://localhost:9092"
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        self.producer = AvroProducer(
            self.broker_properties,
            schema_registry=CachedSchemaRegistryClient('http://localhost:8081'),
            default_key_schema=self.key_schema,
            default_value_schema=self.value_schema
        )
Ejemplo n.º 23
0
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        schema_registry = CachedSchemaRegistryClient(SCHEMA_REGISTRY_URL)
        self.broker_properties = {
                "bootstrap.server": BROKER_URL,
                "group.id": "0",
                "auto.offset.reset": "earliest"
        }

        if is_avro is True:
            self.broker_properties["schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(
                self.broker_properties,
                schema_registry=schema_registry
            )
        else:
            self.consumer = Consumer(self.broker_properties)

        self.consumer.subscribe([self.topic_name_pattern])
Ejemplo n.º 24
0
def update(topic, schema_config, force=False):
    """Given a topic, update (or create) a schema"""
    client = CachedSchemaRegistryClient(schema_config)

    if topic == 'all':
        schema_files = Path(__file__).parent.glob('**/*.avsc')
    else:
        schema_files = Path(__file__).parent.glob(f'**/{topic}-*.avsc')

    for schema_file in schema_files:
        with open(schema_file) as f:
            schema_str = f.read()
        schema_dict = json.loads(schema_str)
        avro_schema = schema.Parse(schema_str)

        subject = schema_dict['namespace'].replace('.', '-') + '-' + schema_dict['name']
        if force:
            client.update_compatibility('NONE', subject=subject)
        else:
            client.update_compatibility('BACKWARD', subject=subject)

        try:
            schema_id = client.register(subject, avro_schema)
            log.info(f'Added/updated {schema_file}\t Schema ID {schema_id}')
        except avro_error.ClientError as error:
            log.error(f'Error adding/updating {schema_file}: {error.message}')
async def produce(broker_url, topic, schema_registry_url, *, num_messages):
    schema_registry = CachedSchemaRegistryClient({"url": schema_registry_url})
    conf = {"bootstrap.servers": broker_url, "client.id": socket.gethostname()}
    p = AvroProducer(conf, schema_registry=schema_registry)
    for _ in range(num_messages):
        p.produce(topic=topic,
                  value=asdict(ClickEvent()),
                  value_schema=ClickEvent.schema)
        await asyncio.sleep(1.0)
Ejemplo n.º 26
0
async def produce(topic_name):
    schema_registry = CachedSchemaRegistryClient({"url": SCHEMA_REGISTRY_URL})
    p = AvroProducer({"bootstrap.servers": BROKER_URL}, schema_registry=schema_registry)
    while True:
        p.produce(
            topic=topic_name, 
            value=asdict(ClickEvent()),
            value_schema=ClickEvent.schema
        )
        await asyncio.sleep(1.0)
Ejemplo n.º 27
0
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
                "KAFKA_BROKER_URL": "PLAINTEXT://localhost:9092",
                "group.id": 'groupid'
        }

        if self.offset_earliest:
            self.broker_properties['auto.offset.reset'] = 'earliest'



        # TODO: Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties["SCHEMA_REGISTRY_URL"] = "http://localhost:8081"
            self.schema_registry = CachedSchemaRegistryClient(self.broker_properties.get("SCHEMA_REGISTRY_URL"))
            self.consumer = AvroConsumer(
                    config={
                        "bootstrap.servers": self.broker_properties.get("KAFKA_BROKER_URL"),
                        "group.id": self.broker_properties.get("group.id")
                    },
                    schema_registry=self.schema_registry)
        else:
            self.consumer = Consumer( {
                         "bootstrap.servers": self.broker_properties.get("KAFKA_BROKER_URL"),
                         "group.id": self.broker_properties.get("group.id")
                         })

        #
        #
        # TODO: Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        #
        #
        self.consumer.subscribe([self.topic_name_pattern], on_assign=self.on_assign)
Ejemplo n.º 28
0
class AvroSerializer(Serializer):
    def __init__(
            self,
            schema_registry_url: str,
            auto_register_schemas: bool = True,
            subject_name_strategy: SubjectNameStrategy = SubjectNameStrategy.
        RecordNameStrategy,
            **kwargs):
        super().__init__(**kwargs)
        schema_registry_url = schema_registry_url
        self.schema_registry = CachedSchemaRegistryClient(schema_registry_url)
        self.auto_register_schemas = auto_register_schemas
        self.subject_name_strategy = subject_name_strategy
        self._serializer_impl = AvroSerDeBase(self.schema_registry)

    def _get_subject(self, topic: str, schema, is_key=False):
        if self.subject_name_strategy == SubjectNameStrategy.TopicNameStrategy:
            subject = topic + ('-key' if is_key else '-value')
        elif self.subject_name_strategy == SubjectNameStrategy.RecordNameStrategy:
            subject = schema.fullname
        elif self.subject_name_strategy == SubjectNameStrategy.TopicRecordNameStrategy:
            subject = '{}-{}'.format(topic, schema.fullname)
        else:
            raise ValueError('Unknown SubjectNameStrategy')
        return subject

    def _ensure_schema(self, topic: str, schema, is_key=False):
        subject = self._get_subject(topic, schema, is_key)

        if self.auto_register_schemas:
            schema_id = self.schema_registry.register(subject, schema)
            schema = self.schema_registry.get_by_id(schema_id)
        else:
            schema_id, schema, _ = self.schema_registry.get_latest_schema(
                subject)

        return schema_id, schema

    def serialize(self, value: AvroRecord, topic: str, is_key=False, **kwargs):
        schema_id, _ = self._ensure_schema(topic, value.schema, is_key)
        return self._serializer_impl.encode_record_with_schema_id(
            schema_id, value, is_key)
    def __init__(self, group_id=None):
        """Initializes a Producer object with basic settings"""

        schema_registry = CachedSchemaRegistryClient({"url": KAFKA_SCHEMA_REGISTRY_URL})

        producer_config = {"bootstrap.servers": KAFKA_BOOTSTRAP_SERVERS}
        if group_id:
            producer_config["client.id"] = group_id

        self.producer = AvroProducer(producer_config, schema_registry=schema_registry)
        logger.debug(f"Created producer with id {group_id}")
Ejemplo n.º 30
0
async def produce(topic_name):
    """Produces data into the Kafka Topic"""
    schema_registry = CachedSchemaRegistryClient(url=SCHEMA_REGISTRY_URL)

    p = AvroProducer(config={"bootstrap.servers": BROKER_URL},
                     schema_registry=schema_registry)
    while True:
        p.produce(topic=topic_name,
                  value=asdict(ClickEvent()),
                  value_schema=ClickEvent().serialize_schema())
        await asyncio.sleep(1.0)