def create_kafka_consumer(group_id='nsb-salgsdvh', autoCommit=False, environment='None'): username = '******' if environment == 'PROD': password = dbutils.secrets.get('KAFKA_ENTUR_PROD', 'password') config = { 'bootstrap.servers': 'bootstrap.prod-ext.kafka.entur.io:9095', 'group.id': group_id, 'enable.auto.commit': autoCommit, 'auto.offset.reset': 'earliest', 'schema.registry.url': 'http://schema-registry.prod-ext.kafka.entur.io:8001', 'security.protocol': 'SASL_SSL', 'sasl.mechanism': 'SCRAM-SHA-512', 'sasl.username': username, 'sasl.password': password } elif environment == 'TEST': password = dbutils.secrets.get('KAFKA_ENTUR', 'password') config = { 'bootstrap.servers': 'bootstrap.test-ext.kafka.entur.io:9095', 'group.id': group_id, 'enable.auto.commit': autoCommit, 'auto.offset.reset': 'earliest', 'schema.registry.url': 'http://schema-registry.test-ext.kafka.entur.io:8001', 'security.protocol': 'SASL_SSL', 'sasl.mechanism': 'SCRAM-SHA-512', 'sasl.username': username, 'sasl.password': password } c = AvroConsumer(config) return c
def __init__( self, topic_name_pattern, message_handler, is_avro=True, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): """Creates a consumer object for asynchronous use""" self.topic_name_pattern = topic_name_pattern self.message_handler = message_handler self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest self.broker_properties = { 'bootstrap.servers': 'PLAINTEXT://localhost:9094', 'group.id': topic_name_pattern, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } # Create the Consumer, using the appropriate type. if is_avro is True: self.broker_properties[ "schema.registry.url"] = "http://localhost:8081" self.consumer = AvroConsumer(self.broker_properties) else: self.consumer = Consumer(self.broker_properties) pass # Configure the AvroConsumer and subscribe to the topics. Make sure to think about # how the `on_assign` callback should be invoked. self.consumer.subscribe([topic_name_pattern], on_assign=self.on_assign)
def __init__( self, topic_name_pattern, message_handler, is_avro=True, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): """Creates a consumer object for asynchronous use""" self.topic_name_pattern = topic_name_pattern self.message_handler = message_handler self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest self.broker_properties = { "bootstrap.servers": "\ PLAINTEXT://localhost:9092,\ PLAINTEXT://localhost:9093,\ PLAINTEXT://localhost:9094\ ", "group.id": f"{self.topic_name_pattern}", "auto.offset.reset": "earliest" if offset_earliest else "latest" } # Create the Consumer, using the appropriate type. if is_avro is True: self.broker_properties[ "schema.registry.url"] = "http://localhost:8081" self.consumer = AvroConsumer(self.broker_properties) else: self.consumer = Consumer(self.broker_properties) self.consumer.subscribe([self.topic_name_pattern], on_assign=self.on_assign)
def consume(topic, conf): """ Consume User records """ from confluent_kafka.avro import AvroConsumer from confluent_kafka.avro.serializer import SerializerError print("Consuming user records from topic {} with group {}. ^c to exit.". format(topic, conf["group.id"])) c = AvroConsumer(conf, reader_value_schema=record_schema) c.subscribe([topic]) while True: try: msg = c.poll(1) # There were no messages on the queue, continue polling if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue record = User(msg.value()) print("name: {}\n\tfavorite_number: {}\n\tfavorite_color: {}\n". format(record.name, record.favorite_number, record.favorite_color)) except SerializerError as e: # Report malformed record, discard results, continue polling print("Message deserialization failed {}".format(e)) continue except KeyboardInterrupt: break print("Shutting down consumer..") c.close()
def confluent_kafka_consumer(args): msg_consumed_count = 0 conf = {'bootstrap.servers': args.bootstrap_servers, 'group.id': uuid.uuid1(), 'session.timeout.ms': 6000, 'default.topic.config': { 'auto.offset.reset': 'latest' }, 'schema.registry.url': args.schema_registry } consumer = AvroConsumer(conf) consumer.subscribe([args.topic]) while True: try: msg = consumer.poll(1) if msg: msg_consumed_count += 1 nlp_processing(msg.value()) except SerializerError as e: print('Message deserialization failed for {}: {}'.format(msg, e)) break if msg is None: continue if msg.error(): print("AvroConsumer error: {}".format(msg.error())) continue if msg_consumed_count >= int(args.msg_count): break consumer.close()
def consume(conf, schema_record): """ Consume MetadataChangeEvent records """ from confluent_kafka.avro import AvroConsumer from confluent_kafka.avro.serializer import SerializerError print( "Consuming MetadataChangeEvent records from topic {} with group {}. ^c to exit." .format(topic, conf["group.id"])) c = AvroConsumer(conf, reader_value_schema=avro.load(schema_record)) c.subscribe([topic]) while True: try: msg = c.poll(1) # There were no messages on the queue, continue polling if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue record = MetadataChangeEvent(msg.value()) print("avro_event: {}\n\t".format(record.value)) except SerializerError as e: # Report malformed record, discard results, continue polling print("Message deserialization failed {}".format(e)) continue except KeyboardInterrupt: break print("Shutting down consumer..") c.close()
def testAvroConsumer(topic): config = { 'bootstrap.servers': "localhost:9092", 'group.id': 'groupid', 'schema.registry.url': 'http://127.0.0.1:8081' } c = AvroConsumer(config) c.subscribe([topic]) running = True msg = None while running: try: msg = c.poll(10) if msg: if not msg.error(): print(msg.value()) elif msg.error().code() != KafkaError._PARTITION_EOF: print(msg.error()) running = False except SerializerError as e: print("Message deserialization failed for %s: %s" % (msg, e)) running = False c.close()
def consume(topic, conf): """ Consume User records """ from confluent_kafka.avro import AvroConsumer from confluent_kafka.avro.serializer import SerializerError print("Consuming user records from topic {} with group {}. ^c to exit.".format(topic, conf["group.id"])) c = AvroConsumer(conf, reader_value_schema=record_schema) c.subscribe([topic]) while True: try: msg = c.poll(1) # There were no messages on the queue, continue polling if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue record = User(msg.value()) print("name: {}\n\tfavorite_number: {}\n\tfavorite_color: {}\n".format( record.name, record.favorite_number, record.favorite_color)) except SerializerError as e: # Report malformed record, discard results, continue polling print("Message deserialization failed {}".format(e)) continue except KeyboardInterrupt: break print("Shutting down consumer..") c.close()
class AvroConsumer: DEFAULT_CONFIG = { 'api.version.request': True, 'client.id': socket.gethostname(), 'default.topic.config': { 'auto.offset.reset': 'latest' }, 'enable.auto.commit': False, 'fetch.error.backoff.ms': 0, 'fetch.wait.max.ms': 100, 'fetch.min.bytes': 1000, 'log.connection.close': False, 'log.thread.name': False, 'session.timeout.ms': 6000, 'statistics.interval.ms': 15000, 'queued.max.messages.kbytes': '10485', 'fetch.message.max.bytes': '10485', } def __init__( self, config, get_message: Callable = get_message, error_handler: Callable = default_error_handler ) -> None: stop_on_eof = config.pop('stop_on_eof', False) poll_timeout = config.pop('poll_timeout', 0.1) self.non_blocking = config.pop('non_blocking', False) self.config = {**self.DEFAULT_CONFIG, **config} self.config['error_cb'] = get_callback( config.pop('error_cb', None), default_error_cb ) self.config['stats_cb'] = get_callback( config.pop('stats_cb', None), default_stats_cb ) self.topics = self._get_topics(self.config) logger.info("Initializing consumer", config=self.config) self.consumer = ConfluentAvroConsumer(self.config) self.consumer.subscribe(self.topics) self._generator = self._message_generator() self._get_message = partial( get_message, consumer=self.consumer, error_handler=error_handler, timeout=poll_timeout, stop_on_eof=stop_on_eof ) def __getattr__(self, name): return getattr(self.consumer, name) def __iter__(self): return self def __next__(self): try: return next(self._generator) except EndOfPartition: raise StopIteration def __enter__(self): return self def __exit__(self, exc_type, exc_value, tb): # the only reason a consumer exits is when an # exception is raised. # # close down the consumer cleanly accordingly: # - stops consuming # - commit offsets (only on auto commit) # - leave consumer group logger.info("Closing consumer") self.consumer.close() def _message_generator(self): while True: message = self._get_message() if message is None: if self.non_blocking: yield None continue statsd.increment(f'{base_metric}.consumer.message.count.total') yield Message(message) def _get_topics(self, config): topics = config.pop('topics', None) assert topics is not None, "You must subscribe to at least one topic" if not isinstance(topics, list): topics = [topics] return topics @property def is_auto_commit(self): return self.config.get('enable.auto.commit', True)
class Consumer: def __init__(self, broker, schema_registry, topic=None, logging_enabled=False, group_id=None, auto_commit=True): """ Initialiser for Confluent Consumer using AvroConsumer. Each consumer can only be subscribed to one topic Parameters ---------- broker: str The URL of the broker (example: 'localhost:9092') schema_registry: str The URL of the confluent Schema Registry endpoint (example: 'http://localhost:8081') topic: str The topic to subscribe too logger: Logger object, Optional The logger object which will be used to log messages if provided groupId: str, Optional An optional groupId which can be used to loadbalance consumers default is "asgard" """ if group_id is None: new_hash = hashlib.sha1() new_hash.update(str(time.time()).encode("utf-8")) group_id = new_hash.hexdigest() self.__consumer = AvroConsumer({ "bootstrap.servers": broker, "group.id": group_id, "schema.registry.url": schema_registry, "enable.auto.commit": auto_commit }) self.__consumer_non_avro = KafkaConsumer({ "bootstrap.servers": broker, "group.id": group_id + "0", "enable.auto.commit": auto_commit }) self.auto_commit = auto_commit if not auto_commit: self.consumed_messages = PriorityQueue() if not topic is None: self.subscribe_to_topic(topic) else: self.topic = None if logging_enabled: self.logger = logging.getLogger(__name__) else: self.logger = None def consume(self, timeout=1): """ Method to consume and return message if exists and can be deserialized Returns ------- str The recieved message payload as a string None No message has been recieved or an error has occured """ if not self.topic is None: msg = None non_avro = False try: msg = self.__consumer.poll(timeout) except SerializerError as e: try: msg = self.__consumer_non_avro.poll(timeout) non_avro = True except Exception as e: self.__log_msg( "Message deserialization has failed {}: {}".format( msg, e), "See the following stack trace", f"{traceback.format_exc()}", delimeter="\n", level="ERROR") except RuntimeError as e: self.__log_msg( "The consumer has been closed and cannot recieve messages", level="ERROR") except Exception as e: self.__log_msg("An unkown error has occured {}".format(e), "See the following stack trace", f"{traceback.format_exc()}", delimeter="\n", level="ERROR") if not msg is None: if msg.error(): self.__log_msg("AvroConsumer error: {}".format( msg.error()), level="ERROR") else: if not self.auto_commit: self.consumed_messages.put_nowait(msg) if non_avro: data_to_be_returned = json.loads(msg.value().decode()) else: data_to_be_returned = msg.value() return data_to_be_returned else: raise ValueError("Consumer is currently not subscribed to a topic") def __enter__(self): return self.__consumer def __exit__(self, *args): self.close() def __log_msg( self, *messages, level="NOTSET", delimeter=" ", ): levels = { "CRITICAL": logging.CRITICAL, "ERROR": logging.ERROR, "WARNING": logging.WARNING, "INFO": logging.INFO, "DEBUG": logging.DEBUG, "NOTSET": logging.NOTSET } msg = delimeter.join(messages) if self.logger is not None: if level not in levels: raise ValueError( f"level {level} is not valid must be one of {list(levels.keys())}" ) self.logger.log(levels[level], msg) else: if level is not None: print(f"LOGGED MESSAGE: {msg}") else: print(f"{level}: {msg}") def commit(self, asynchronous=True): if not self.auto_commit and not self.consumed_messages.empty(): msg = self.consumed_messages.get_nowait() self.__consumer.commit(msg, asynchronous=asynchronous) def list_topics(self, topic=None, timeout=1): try: metadata = self.__consumer.list_topics(topic, timeout) topics = metadata.topics return list(topics.keys()) except Exception as e: self.__log_msg( f"An unknown error has occured when trying to list topics {e}", "ERROR") self.logger.debug(e) def check_if_topic_exists(self, topic, timeout=1): topic_list = self.list_topics(timeout=timeout) if topic_list is not None: return topic in topic_list def subscribe_to_topic(self, topic): try: self.__consumer_non_avro.subscribe([topic], on_assign=self.__assign) self.__consumer.subscribe([topic], on_assign=self.__assign) self.topic = topic return True except Exception as e: self.__log_msg( "An unknown error {}".format(e), "occured while trying to subscribe to topic {}".format(topic), delimeter=" ", level="ERROR") return False def __assign(self, consumer, partitions): for p in partitions: p.offset = consumer.get_watermark_offsets(p)[1] - 1 self.__consumer.assign(partitions) self.__consumer_non_avro.assign(partitions) def close(self): """ Close the consumer, Once called this object cannot be reused """ self.__consumer.close()
from confluent_kafka import KafkaError from confluent_kafka.avro import AvroConsumer from confluent_kafka.avro.serializer import SerializerError import time from concurrent.futures import ThreadPoolExecutor, wait import boto3 from config import conf from datetime import datetime KAFKA_BROKER_URL = conf.KAFKA_CONFIG["bootstrap.servers"] SCHEMA_REGISTRY_URL = conf.KAFKA_CONFIG["schema.registry.url"] c = AvroConsumer({ 'bootstrap.servers': KAFKA_BROKER_URL, 'group.id': 'testGrp_2', 'schema.registry.url': SCHEMA_REGISTRY_URL }) c.subscribe(['userPratilipiEventAggregated']) pool = ThreadPoolExecutor(20) dynamodb = boto3.resource('dynamodb', region_name=conf.AWS_REGION) table = dynamodb.Table('user_pratilipi') def pushEventToDB(msg): data = msg["data"] pratilipi_id = msg["pratilipiId"] date = datetime.fromtimestamp(data["readTime"] /
class KafkaConsumer: def __init__(self, kafka_brokers="", scram_username="", scram_password="", topic_name="", schema_registry_url="", autocommit=True): self.kafka_brokers = kafka_brokers self.scram_username = scram_username self.scram_password = scram_password self.topic_name = topic_name self.schema_registry_url = schema_registry_url self.kafka_auto_commit = autocommit # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md def prepareConsumer(self, groupID="pythonconsumers"): options = { 'bootstrap.servers': self.kafka_brokers, 'group.id': groupID, 'auto.offset.reset': 'earliest', 'schema.registry.url': self.schema_registry_url, 'enable.auto.commit': self.kafka_auto_commit, 'security.protocol': 'SASL_SSL', 'sasl.mechanisms': 'SCRAM-SHA-512', 'sasl.username': self.scram_username, 'sasl.password': self.scram_password, 'ssl.ca.location': os.environ['PEM_CERT'], 'schema.registry.ssl.ca.location': os.environ['PEM_CERT'] } # Print the configuration print("--- This is the configuration for the Avro consumer: ---") print(options) print("---------------------------------------------------") # Create the Avro consumer self.consumer = AvroConsumer(options) # Subscribe to the topic self.consumer.subscribe([self.topic_name]) def traceResponse(self, msg): print( '[Message] - Next message consumed from {} partition: [{}] at offset {} with key {} and value {}' .format(msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value())) # Polls for next event def pollNextEvent(self): # Poll for messages msg = self.consumer.poll(timeout=10.0) # Validate the returned message if msg is None: print("[INFO] - No new messages on the topic") elif msg.error(): if ("PARTITION_EOF" in msg.error()): print("[INFO] - End of partition") else: print("[ERROR] - Consumer error: {}".format(msg.error())) else: # Print the message msgStr = self.traceResponse(msg) def close(self): self.consumer.close()
from pprint import pprint # get saved keys import generalconfig as cfg confluentKey = cfg.pwd['confluentKey'] confluentSecret = cfg.pwd['confluentSecret'] confluentSchemaRegistryKey = cfg.pwd['confluentSchemaRegistryKey'] confluentSchemaRegistrySecret = cfg.pwd['confluentSchemaRegistrySecret'] c = AvroConsumer({ 'bootstrap.servers': "pkc-41973.westus2.azure.confluent.cloud:9092", 'security.protocol': 'SASL_SSL', 'sasl.mechanism': 'PLAIN', 'sasl.username': confluentKey, 'sasl.password': confluentSecret, 'schema.registry.url': 'https://psrc-4r0k9.westus2.azure.confluent.cloud', 'schema.registry.basic.auth.credentials.source': 'USER_INFO', 'schema.registry.basic.auth.user.info': f'{confluentSchemaRegistryKey}:{confluentSchemaRegistrySecret}', 'group.id': '1', }) c.subscribe(['covid']) for i in range(10): try: msg = c.poll(timeout=20) except SerializerError as e: print("Message deserialization failed, skipping bad message.") continue
def dump(obj): for attr in dir(obj): print("obj.%s = %r" % (attr, getattr(obj, attr))) def get_schema_id(topic): response = requests.get( 'http://35.223.91.93:8081/subjects/{}/versions/latest'.format(topic)) return json.loads(response.text) c = AvroConsumer({ 'bootstrap.servers': '10.128.0.8,10.128.0.9,10.128.0.10', 'schema.registry.url': 'http://35.223.91.93:8081', 'group.id': 'groupid' }) c.subscribe(['redshift']) while True: try: msg = c.poll(1) except SerializerError as e: print("Message deserialization failed for {}: {}".format(msg, e)) break if msg is None: continue
#!/usr/bin/env python from confluent_kafka import avro from confluent_kafka.avro import AvroConsumer from confluent_kafka.avro.serializer import SerializerError conf = { "bootstrap.servers": "10.227.52.244:31090,10.227.52.244:31091,10.227.52.244:31092", "schema.registry.url": "http://10.227.52.244:30553", "group.id": "testeiei" } topic="ingester" key_schema = avro.load("./schemas/{}-key.avsc".format(topic)) value_schema = avro.load("./schemas/{}-value.avsc".format(topic)) # c = AvroConsumer(conf, reader_key_schema=key_schema, reader_value_schema=value_schema) c = AvroConsumer(conf) c.subscribe([topic]) while True: try: msg = c.poll(1) # There were no messages on the queue, continue polling if msg is None: print(".") continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue print(msg.key(), msg.value()) except SerializerError as e:
from confluent_kafka import KafkaError from confluent_kafka.avro import AvroConsumer from confluent_kafka.avro.serializer import SerializerError from confluent_kafka import OFFSET_BEGINNING from elasticsearch import Elasticsearch es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) es.indices.create(index='prgs1min', ignore=400) c = AvroConsumer({ 'default.topic.config': { 'auto.offset.reset': 'earliest' }, 'bootstrap.servers': 'ip adress:9092', 'group.id': 'test-consumer-group', 'schema.registry.url': 'http://localhost:8081' }) def my_assign(consumer, partitions): for p in partitions: p.offset = OFFSET_BEGINNING print('assign', partitions) consumer.assign(partitions) c.subscribe(['PRGS1MIN'], on_assign=my_assign) running = True while running: try:
from confluent_kafka import KafkaError from confluent_kafka.avro import AvroConsumer from confluent_kafka.avro.serializer import SerializerError c = AvroConsumer({ 'bootstrap.servers': 'kafka1:9092', 'auto.offset.reset': 'earliest', 'group.id': 'local', 'schema.registry.url': 'http://kafka-schema-registry:8081' }) c.subscribe(['my_topic2']) running = True while running: try: msg = c.poll(10) if msg: if not msg.error(): print(msg.value()) elif msg.error().code() != KafkaError._PARTITION_EOF: print(msg.error()) running = False except SerializerError as e: print("Message deserialization failed for %s: %s" % (msg, e)) running = False c.close()
def setup(self): self.consumer = AvroConsumer(self.get_consumer_settings()) self.consumer.subscribe([self.get_topic_name()])
class KafkaWorker(BaseWorker): topic_name = None consumer_name = None consumer_settings = {} commit_on_complete = False async_commit = True poll_timeout = 0 auto_offset_reset = 'earliest' consumer = None last_message = None def setup(self): self.consumer = AvroConsumer(self.get_consumer_settings()) self.consumer.subscribe([self.get_topic_name()]) def teardown(self): if self.consumer: self.consumer.close() def get_topic_name(self): return self.topic_name or utils.config_missing('topic name') def get_consumer_name(self): return self.consumer_name or utils.generate_random_consumer_name() def get_consumer_settings(self): default_settings = { 'group.id': self.get_consumer_name(), 'default.topic.config': {'auto.offset.reset': self.auto_offset_reset}, 'enable.auto.commit': False, 'bootstrap.servers': utils.get_broker_url(), 'schema.registry.url': utils.get_schema_registry_url(), 'session.timeout.ms': 10000, 'heartbeat.interval.ms': 1000, 'api.version.request': True, } return utils.generate_client_settings(default_settings, self.consumer_settings) def poll(self): message = self.consumer.poll(timeout=self.poll_timeout) if message is not None: self.last_message = message return message def get_partitions(self): partitions = self.consumer.assignment() if not partitions: self.poll() partitions = self.consumer.assignment() return partitions def get_current_offsets(self): return self.consumer.position(self.get_partitions()) def reset_consumer_offsets(self, offset): self.consumer.assign([TopicPartition(tp.topic, tp.partition, offset) for tp in self.get_partitions()]) def seek_to_timestamp(self, timestamp): timestamp_ms = dt_to_unix_ms(timestamp) partitions = self.get_partitions() for tp in partitions: tp.offset = timestamp_ms partitions = self.consumer.offsets_for_times(partitions) self.consumer.assign(partitions) def handle(self): message = self.poll() if message is None: self.wait() elif message.error(): if message.error().code() == KafkaError._PARTITION_EOF: self.partition_eof(message) else: raise KafkaException(message.error()) else: self._consume(message) if self.commit_on_complete: self.commit() self.done() def commit(self): if not self.consumer_settings.get('enable.auto.commit'): self.consumer.commit(async=self.async_commit) def _consume(self, message): self.consume_message(MessageValue(message)) def consume_message(self, message): pass def partition_eof(self, message): pass
from confluent_kafka import KafkaError from confluent_kafka.avro import AvroConsumer from confluent_kafka.avro.serializer import SerializerError import json, ast c = AvroConsumer({ 'bootstrap.servers': "temple.di.uoa.gr:9092", 'group.id': 'groupid', 'schema.registry.url': "http://temple.di.uoa.gr:8081", 'auto.offset.reset': 'latest' }) c.subscribe(["FusionAlert"]) while True: try: msg = c.poll(10) except SerializerError as e: print("Message deserialization failed for {}: {}".format(msg, e)) break if msg is None: continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: continue else: print(msg.error())
def __init__(self, brokers, group, schema_registry_url): self.avro_consumer = AvroConsumer({ 'bootstrap.servers': brokers, 'group.id': group, 'auto.offset.reset': 'earliest', 'schema.registry.url': schema_registry_url})
def create_consumer( self, group_id=None, server="127.0.0.1", port="9092", enable_auto_commit=True, auto_offset_reset="latest", schema_registry_url=None, auto_create_topics=True, key_deserializer=None, value_deserializer=None, legacy=True, **kwargs ): """Create Kafka Consumer and returns its `group_id` as string. Keyword Arguments: - ``server``: (str): IP address / domain, that the consumer should contact to bootstrap initial cluster metadata. Default: `127.0.0.1`. - ``port`` (int): Port number. Default: `9092`. - ``group_id`` (str or uuid.uuid4() if not set) : name of the consumer group to join for dynamic partition assignment (if enabled), and to use for fetching and committing offsets. If None, unique string is generated (via uuid.uuid4()) and offset commits are disabled. Default: `None`. - ``auto_offset_reset`` (str): A policy for resetting offsets on OffsetOutOfRange errors: `earliest` will move to the oldest available message, `latest` will move to the most recent. Any other value will raise the exception. Default: `latest`. - ``enable_auto_commit`` (bool): If true the consumer's offset will be periodically committed in the background. Default: `True`. - ``schema_registry_url`` (str): *required* for Avro Consumer. Full URL to avro schema endpoint. - ``auto_create_topics`` (bool): Consumers no longer trigger auto creation of topics, will be removed in future release. Default: `True`. - ``legacy`` (bool): Activate SerializingConsumer if 'False' else AvroConsumer (legacy) is used. Will be removed when confluent-kafka will deprecate this. Default: `True`. Note: Configuration parameters are described in more detail at https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md \n """ if group_id is None: group_id = str(uuid.uuid4()) if schema_registry_url and legacy: consumer = AvroConsumer({ 'bootstrap.servers': '{}:{}'.format(server, port), 'group.id': group_id, 'enable.auto.commit': enable_auto_commit, 'allow.auto.create.topics': auto_create_topics, 'auto.offset.reset': auto_offset_reset, 'schema.registry.url': schema_registry_url, **kwargs}) elif not legacy: consumer = DeserializingConsumer({ 'bootstrap.servers': '{}:{}'.format(server, port), 'group.id': group_id, 'enable.auto.commit': enable_auto_commit, 'auto.offset.reset': auto_offset_reset, 'key.deserializer': key_deserializer, 'value.deserializer': value_deserializer, **kwargs}) else: consumer = Consumer({ 'bootstrap.servers': '{}:{}'.format(server, port), 'group.id': group_id, 'enable.auto.commit': enable_auto_commit, 'allow.auto.create.topics': auto_create_topics, 'auto.offset.reset': auto_offset_reset, **kwargs}) self.consumers[group_id] = consumer return group_id
def run_consumer(group_id: str, broker_url: str, registry_url: str, subscription_name: str, apiestas_url: str): logger.info( f"Kafka consumer listening to {broker_url} and subscribed to {subscription_name}" ) consumer = AvroConsumer({ 'group.id': group_id, 'bootstrap.servers': broker_url, 'schema.registry.url': registry_url }) consumer.subscribe([subscription_name]) while True: try: msg = consumer.poll(10) except SerializerError as e: logger.error("Message deserialization failed for {}: {}".format( msg, e)) break if msg is None: continue if msg.error(): logger.error("AvroConsumer error: {}".format(msg.error())) continue # Find surebets doc = json.loads(msg.value()) parse_mongo_dates(doc) try: match = MatchInDB(**doc) except ValidationError as e: logger.error( f"Error generating model from Kafka event. Event: {msg.value()}" ) continue # We do not process surebets that has been created less than one minute ago if not (match.surebets and datetime.utcnow() - min(surebet.created_at for surebet in match.surebets) < timedelta(minutes=1)): surebets = SureBetsFinder(match.bets).find_all() if surebets: logger.info( f"{len(surebets)} surebets found for match '{match.slug}'") data = json.dumps(list(map(lambda x: x.dict(), surebets))) try: response = requests.post( f"{apiestas_url}/api/matches/{match.slug}/surebets", data=data) if response.status_code != 200: logger.error( f"There was an error submitting the surebet. Data: {data}" ) except Exception: logger.error( f"There was an error submitting the surebet. Data: {data}", exc_info=True) else: logger.info(f"No surebets found for match '{match.slug}'") consumer.close() def check_surebets_recency(match: MatchInDB): min(surebet.created_at for surebet in surebets)
zookeeperServer = hostname zookeeperPort = 2185 zookeeper = zookeeperServer + ":" + str(zookeeperPort) schemaRegistryServer = hostname schemaRegistryPort = 8081 topic = 'my_topic' SCHEMA_REGISTRY_URL = 'http://' + schemaRegistryServer + ':' + str( schemaRegistryPort) print """\nINFO: Kakfa Connection Details: Kafka Broker : %s Zookeeper : %s Topic : %s """ % (kafkaBroker, zookeeper, topic) conf = { 'bootstrap.servers': kafkaBroker, 'group.id': 'mygroup', 'schema.registry.url': SCHEMA_REGISTRY_URL } print "\nINFO: Create Client obj for Kafka Connection" avroConsume_client = AvroConsumer(conf) consume_messages(avroConsume_client, topic=topic)
"name": "Student", "fields": [ {"name": "first_name", "type": ["null", "string"], "default": null, "doc": "First name of the student"}, {"name": "last_name", "type": ["null", "string"], "default": null, "doc": "Last name of the student"}, {"name": "class", "type": "int", "default": 1, "doc": "Class of the student"} ] } """ value_schema = avro.loads(value_schema_str) c = AvroConsumer( { 'bootstrap.servers': 'peter-kafka01.foo.bar,peter-kafka02.foo.bar,peter-kafka03.foo.bar', 'group.id': 'python-groupid02', 'auto.offset.reset': 'earliest', 'schema.registry.url': 'http://peter-kafka03.foo.bar:8081' }, reader_value_schema=value_schema) c.subscribe(['peter-avro2']) while True: try: msg = c.poll(10) except SerializerError as e: print("Message deserialization failed for {}: {}".format(msg, e)) break
from confluent_kafka.avro import AvroConsumer import phonenumbers from phonenumbers.phonenumberutil import region_code_for_number import pandas as pd import pycountry c = AvroConsumer({ 'bootstrap.servers': '10.254.34.155:9092', 'group.id': 'test-grupa', 'schema.registry.url': 'http://10.254.34.155:8081', 'default.topic.config': { 'auto.offset.reset': 'smallest' } }) c.subscribe(['tel_buildset_fer']) def translated(): return True def get_df(): while True: try: first_msg = c.poll(10) mapa = first_msg.value() print(mapa) mapa['CODE'] = region_code_for_number( phonenumbers.parse('+' + str(int(mapa['CALLEE'])))) callee_country = pycountry.countries.get( alpha_2=region_code_for_number(
if __name__ == '__main__': default_group_name = "default-consumer-group" # Push messages to Transactions Topic # producer = AvroProducer(bootstrap_servers=KAFKA_BROKER_URL, value_serializer=lambda value: json.dumps(value).encode()) consumer_config = { "bootstrap.servers": KAFKA_BROKER_URL, "schema.registry.url": SCHEMA_REGISTRY_URL, "group.id": default_group_name, "auto.offset.reset": "earliest"} consumer = AvroConsumer(consumer_config) # Consumer: Read from Transactions Topic print("Created Consumer") consumer.subscribe([TRANSACTIONS_TOPIC]) print(f"Consumer subscribed to {TRANSACTIONS_TOPIC}") raw_messages = [] # As we read messages pushed from producer to the consumer - classify: while True: try: message = consumer.poll(5) print(f"Polled for message: {message}") except SerializerError as e: # print(f"Exception while trying to poll messages: {e}")
from confluent_kafka.avro import AvroConsumer from confluent_kafka.avro.serializer import SerializerError c = AvroConsumer({ 'bootstrap.servers': 'localhost:32772,localhost:32773,localhost:32774', 'group.id': 'avro-consumer', 'schema.registry.url': 'http://localhost:8081', 'auto.offset.reset': 'earliest' }) c.subscribe(['my_topic']) while True: try: msg = c.poll(1.0) except SerializerError as e: print("Message deserialization failed for {}: {}".format(msg, e)) break if msg is None: continue if msg.error(): print("AvroConsumer error: {}".format(msg.error())) continue print(msg.value()) c.close()
from confluent_kafka import KafkaError, avro from confluent_kafka.avro import AvroConsumer, AvroProducer from confluent_kafka.avro.serializer import SerializerError from confluent_kafka import TopicPartition from math import ceil from calculation import calculate_average, calculate_ranking from schema import key_schema_avg_str, key_schema_rank_str, value_schema_avg_str, value_schema_rank_str import datetime c = AvroConsumer({ 'bootstrap.servers': 'localhost:9092', 'group.id': 'join-ksql', 'schema.registry.url': 'http://0.0.0.0:8081' }) # c.assign([Partition]) c.subscribe(['students_result_source']) value_schema_avg = avro.loads(value_schema_avg_str) key_schema_avg = avro.loads(key_schema_avg_str) value_schema_rank = avro.loads(value_schema_rank_str) key_schema_rank = avro.loads(key_schema_rank_str) producer_avg = AvroProducer({ 'bootstrap.servers': 'localhost:9092', 'schema.registry.url': 'http://0.0.0.0:8081' }, default_key_schema=key_schema_avg, default_value_schema=value_schema_avg) producer_rank = AvroProducer({ 'bootstrap.servers': 'localhost:9092',
from confluent_kafka import KafkaError from confluent_kafka.avro import AvroConsumer from confluent_kafka.avro.serializer import SerializerError c = AvroConsumer({ 'bootstrap.servers': 'localhost:9092', 'group.id': 'groupid2', 'schema.registry.url': 'http://127.0.0.1:8081', 'default.topic.config': { 'auto.offset.reset': 'smallest' } }) c.subscribe(['passenger2']) while True: try: msg = c.poll(1) except SerializerError as e: print("Message deserialization failed for {}: {}".format(msg, e)) break if msg is None: print("no message") continue if msg.error(): print("AvroConsumer error: {}".format(msg.error()))
def avro_consumer(urls, topics, uav_name): c = AvroConsumer(urls) c.subscribe(topics) check_time = 0 msges = [] c_topic = "" loop = len(topics) while True: try: msg = c.poll(10) except SerializerError as e: # print("Message deserialization failed for {}: {}".format(msg, e)) break if msg is None: continue if msg.error(): # print("AvroConsumer error: {}".format(msg.error())) continue m = msg.value() if (m["header"]['sourceSystem'])== uav_name: if check_time==0 or check_time==m["header"]["time"]: c.unsubscribe() check_time=m["header"]["time"] c_topic = msg.topic() d = topics.index(c_topic) del topics[d] msges.append(msg) loop = loop - 1 if loop==0: break c.subscribe(topics) c.close() # return the list of consumed avro messages (one for each topic - same timestamp) return(msges)
class KafkaConsumer: def __init__(self, kafka_env='LOCAL', kafka_brokers="", kafka_apikey="", topic_name="", schema_registry_url="", autocommit=True): self.kafka_env = kafka_env self.kafka_brokers = kafka_brokers self.kafka_apikey = kafka_apikey self.topic_name = topic_name self.schema_registry_url = schema_registry_url self.kafka_auto_commit = autocommit # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md def prepareConsumer(self, groupID="pythonconsumers"): options = { 'bootstrap.servers': self.kafka_brokers, 'group.id': groupID, 'auto.offset.reset': 'earliest', 'schema.registry.url': self.schema_registry_url, 'enable.auto.commit': self.kafka_auto_commit, } if (self.kafka_env != 'LOCAL' and self.kafka_env != 'MINIKUBE'): options['security.protocol'] = 'SASL_SSL' options['sasl.mechanisms'] = 'PLAIN' options['sasl.username'] = '******' options['sasl.password'] = self.kafka_apikey if (self.kafka_env == 'OCP'): options['ssl.ca.location'] = os.environ['PEM_CERT'] options['schema.registry.ssl.ca.location'] = os.environ['PEM_CERT'] print("This is the configuration for the consumer:") print(options) self.consumer = AvroConsumer(options) self.consumer.subscribe([self.topic_name]) def traceResponse(self, msg): print( '@@@ pollNextOrder {} partition: [{}] at offset {} with key {}:\n\tvalue: {}' .format(msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value())) def pollNextEvent(self, keyID, keyname): gotIt = False while not gotIt: try: msg = self.consumer.poll(timeout=10.0) except SerializerError as e: print("Message deserialization failed for {}: {}".format( msg, e)) break if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) if ("PARTITION_EOF" in msg.error()): gotIt = True continue self.traceResponse(msg) if (msg.key()[keyname] == keyID): gotIt = True def close(self): self.consumer.close()