def _make_consumer(self) -> DeserializingConsumer: schema_registry_client = SchemaRegistryClient( {"url": self.config["schema_registry"]}) key_deserializer = AvroDeserializer(schema_registry_client) value_deserializer = AvroDeserializer(schema_registry_client) config = { "bootstrap.servers": ",".join(self.config["bootstrap_servers"]), "key.deserializer": key_deserializer, "value.deserializer": value_deserializer, "enable.auto.commit": False, "enable.partition.eof": True, "group.id": self.config["group_id"], "default.topic.config": { "auto.offset.reset": "earliest" }, **self.config["kafka_opts"], } hash_sensitive_values = self.config["hash_sensitive_values"] consumer = DeserializingConsumer(config) hidden_config = hide_sensitive_values( config, hash_sensitive_values=hash_sensitive_values) logger.info( f"AvroConsumer created with config: {pformat(hidden_config, indent=2)}" ) # noinspection PyArgumentList consumer.subscribe(self.config["topics"], on_assign=self._on_assign, on_revoke=self._on_revoke) return consumer
def __init__(self, consumer=None): super().__init__() self.logger = logging.getLogger(__package__) self.logger.debug("Initializing the consumer") self.consumer = consumer self.message_handler = HandleMessage() self._stop_event = threading.Event() # Track whether there is currently a message being processed. Just a raw # bool is OK because the subscription is configured to prefetch 1 # message at a time - i.e. this function should NOT run in parallel self._processing = False while self.consumer is None: try: self.logger.debug("Getting the kafka consumer") config = kafka_config_from_env() config['key.deserializer'] = StringDeserializer('utf_8') config['value.deserializer'] = StringDeserializer('utf_8') config['on_commit'] = self.on_commit config['group.id'] = GROUP_ID config['auto.offset.reset'] = 'earliest' self.consumer = DeserializingConsumer(config) except KafkaException as err: self.logger.error("Could not initialize the consumer: %s", err) raise ConnectionException( "Could not initialize the consumer") from err self.consumer.subscribe([TRANSACTIONS_TOPIC])
def __init__(self, consumer_topic, producer_topic, client_id, bootstrap_servers, consumer_proto_class, producer_proto_class, processor, max_thread_calls): self.consumer_topic = consumer_topic self.producer_topic = producer_topic self.client_id = client_id self.bootstrap_servers = bootstrap_servers self.consumer_proto_class = consumer_proto_class self.producer_proto_class = producer_proto_class self.processor = processor self.max_thread_calls = max_thread_calls self.kafka_consumer = DeserializingConsumer({ 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.client_id, 'auto.offset.reset': "earliest", 'value.deserializer': self.derializer }) self.kafka_consumer.subscribe([self.consumer_topic]) self.kafka_producer = SerializingProducer({ 'bootstrap.servers': self.bootstrap_servers, 'queue.buffering.max.messages': 500000, 'value.serializer': self.serialize }) self.thread_queue = deque(maxlen=self.max_thread_calls) self.latest_thread_queue_id = 1
def receive(): json_deserializer = JSONDeserializer(USER_SCHEMA, from_dict=dict_to_user) string_deserializer = StringDeserializer('utf_8') consumer_conf = { 'bootstrap.servers': 'localhost:9092', 'key.deserializer': string_deserializer, 'value.deserializer': json_deserializer, 'group.id': 'django-kafka', 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([USER_TOPIC]) """ The idea is to start the Kafka consumer when the message is sent to the Kafka producer. Resulting in two queues: Task Queue and Message/Content Queue. Multi-threading might be an overkill for a simple application, hence the for loop (Temporary). """ for x in range(200): try: msg = consumer.poll(timeout=5.0) if msg is not None: user = msg.value() if user is not None: print("User record {}: username: {}\n" "\tdata: {}\n".format(msg.key(), user.username, user.data)) except Exception as e: print('An exception occurred: {}'.format(e)) logging.error(traceback.format_exc())
def main(args): topic = args.topic schema_str = """ { "$schema": "http://json-schema.org/draft-07/schema#", "title": "User", "description": "A Confluent Kafka Python User", "type": "object", "properties": { "name": { "description": "User's name", "type": "string" }, "favorite_number": { "description": "User's favorite number", "type": "number", "exclusiveMinimum": 0 }, "favorite_color": { "description": "User's favorite color", "type": "string" } }, "required": [ "name", "favorite_number", "favorite_color" ] } """ json_deserializer = JSONDeserializer(schema_str, from_dict=dict_to_user) string_deserializer = StringDeserializer('utf_8') consumer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': json_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue user = msg.value() if user is not None: print("User record {}: name: {}\n" "\tfavorite_number: {}\n" "\tfavorite_color: {}\n".format(msg.key(), user.name, user.favorite_color, user.favorite_number)) except KeyboardInterrupt: break
def set_offsets_to_time(start_from_seconds_ago: int, consumer: confluent_kafka.DeserializingConsumer, partitions: List[confluent_kafka.TopicPartition]) -> None: start_from = datetime.now(timezone.utc) - timedelta(seconds=start_from_seconds_ago) logger.info('Setting consumer offsets to start from %s', start_from) for p in partitions: p.offset = int(start_from.timestamp() * 1000) # yep, it's a weird API consumer.assign(partitions) for p in consumer.offsets_for_times(partitions): logger.debug('Topic %s partition %s SEEKing to offset %s', p.topic, p.partition, p.offset) consumer.seek(p)
def _init_consumer(topics: List[str], config: Dict) -> Consumer: """config must contain: `bootstrap.servers` 'group.id' but may contain every other kafka setting as well """ assert "bootstrap.servers" in config.keys() assert "group.id" in config.keys() consumer = DeserializingConsumer(config) consumer.subscribe(topics) return consumer
def main(args): topic = args.topic schema_str = """ { "namespace": "confluent.io.examples.serialization.avro", "name": "User", "type": "record", "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": "int"}, {"name": "favorite_color", "type": "string"} ] } """ sr_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(sr_conf) avro_deserializer = AvroDeserializer(schema_registry_client, schema_str, dict_to_user) string_deserializer = StringDeserializer('utf_8') consumer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue user = msg.value() if user is not None: print("User record {}: name: {}\n" "\tfavorite_number: {}\n" "\tfavorite_color: {}\n".format(msg.key(), user.name, user.favorite_color, user.favorite_number)) except KeyboardInterrupt: break
def consumer(self, conf=None, key_deserializer=None, value_deserializer=None): """ Returns a consumer bound to this cluster. Args: conf (dict): Consumer config overrides key_deserializer (Deserializer): deserializer to apply to message key value_deserializer (Deserializer): deserializer to apply to message value Returns: Consumer: A new DeserializingConsumer instance """ consumer_conf = self.client_conf({ 'group.id': str(uuid1()), 'auto.offset.reset': 'earliest' }) if conf is not None: consumer_conf.update(conf) if key_deserializer is not None: consumer_conf['key.deserializer'] = key_deserializer if value_deserializer is not None: consumer_conf['value.deserializer'] = value_deserializer return DeserializingConsumer(consumer_conf)
def _make_offset_consumer(self) -> DeserializingConsumer: """ Creates the underlying instance of :class:`confluent_kafka.avro.AvroConsumer` which is used to fetch the last committed producer offsets. """ key_deserializer = AvroDeserializer(self.schema_registry_client) value_deserializer = AvroDeserializer(self.schema_registry_client) config = { "bootstrap.servers": self.config["bootstrap.servers"], "key.deserializer": key_deserializer, "value.deserializer": value_deserializer, "enable.partition.eof": True, "group.id": f'{self.config["offset_topic"]}_fetcher', "default.topic.config": { "auto.offset.reset": "latest" }, **self.config["kafka_opts"], **self.config["kafka_consumer_opts"], } offset_consumer = DeserializingConsumer(config) logger.info( f"Offset Consumer created with config: {pformat(config, indent=2)}" ) return offset_consumer
def kafpubsub(args): publisher = pubsub.PublisherClient() project_id = args.project kafka_topic = args.topic pubsub_topic = f'projects/{project_id}/topics/{kafka_topic}' try: publisher.create_topic(pubsub_topic) except AlreadyExists: pass # I don't need an error if topic already created. consumer_conf = { 'bootstrap.servers': args.bootstrap_server, 'group.id': args.group_id, 'auto.offset.reset': args.auto_offset_reset } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([kafka_topic]) logging.info( f'Publish Kafka ({args.bootstrap_server}) values to pubsub...') while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue logging.debug(f'> {datetime.today()} | {msg.key()}\n') publisher.publish(pubsub_topic, msg.value()) except KeyboardInterrupt: break consumer.close()
def run_consumer(shutdown_flag, clients, lock): print("Starting Kafka Consumer.") schema_registry_client = SchemaRegistryClient( {"url": "http://localhost:8081"}) deserializer = AvroDeserializer(schema_registry_client) config = { "bootstrap.servers": "localhost:9092", "group.id": "dashboard-demo", "value.deserializer": deserializer } consumer = DeserializingConsumer(config) consumer.subscribe(["DASHBOARD"]) while not shutdown_flag.done(): msg = consumer.poll(0.2) if msg is None: print("Waiting...") elif msg.error(): print(f"ERROR: {msg.error()}") else: value = msg.value() formatted = simplejson.dumps(value) print(f"Sending {formatted} to {clients}") with lock: websockets.broadcast(clients, formatted) print("Closing Kafka Consumer") consumer.close()
def main(args): topic = args.topic protobuf_deserializer = ProtobufDeserializer(user_pb2.User) string_deserializer = StringDeserializer('utf_8') consumer_conf = {'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': protobuf_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest"} consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue user = msg.value() if user is not None: print("User record {}: name: {}\n" "\tfavorite_number: {}\n" "\tfavorite_color: {}\n" .format(msg.key(), user.name, user.favorite_color, user.favorite_number)) except KeyboardInterrupt: break consumer.close()
def _consume(self, on_consume): if isinstance(on_consume, types.FunctionType): callback = on_consume else: callback_cls = on_consume() callback = callback_cls.on_message consumer = DeserializingConsumer(self.kafka_config) consumer.subscribe([self.topic]) q = Queue(maxsize=self.num_threads) msg = None while True: try: # Check if we should rate limit msg = consumer.poll(1) if msg is None: continue if msg.error(): logger.error( f'Worker for topic {self.topic} error: {msg.error()}') continue q.put(msg) t = threading.Thread( target=_process_msg, args=(q, consumer, callback, self.topic), ) t.start() except Exception as err: logger.error( f'Worker for topic {self.topic} terminated: {err}') logger.error(msg) consumer.close() break
def main(args): topic = args.topic protobuf_deserializer = ProtobufDeserializer(user_pb2.User) string_deserializer = StringDeserializer('utf_8') consumer_conf = {'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': protobuf_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest"} consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: msg = consumer.poll(1.0) if msg is None: continue user = msg.value() if user is not None: print(f"User record {msg.key()}:\n name: {user.name}\n" f"\tfavorite_number: {user.favorite_color}\n" f"\tfavorite_color: {user.favorite_number}\n") except KeyboardInterrupt: break consumer.close()
def consume(self, count: int): consumer = DeserializingConsumer({ 'bootstrap.servers': self.brokers, 'key.deserializer': StringDeserializer('utf_8'), 'value.deserializer': self._make_deserializer(), 'group.id': self.group, 'auto.offset.reset': "earliest" }) consumer.subscribe([self.topic]) self.logger.info("Consuming %d %s records from topic %s with group %s", count, self.schema_type.name, self.topic, self.group) while self.consumed < count: msg = consumer.poll(1) if msg is None: continue payload = msg.value() self.logger.debug("Consumed %d at %d", payload.val, msg.offset()) assert payload.val == self.consumed self.consumed += 1 consumer.close()
def main(): string_deserializer = StringDeserializer('utf_8') conf = { 'bootstrap.servers': 'localhost:9092', 'group.id': 'bitcoin_group', 'key.deserializer': string_deserializer, 'value.deserializer': string_deserializer, 'session.timeout.ms': 6000, 'fetch.wait.max.ms': 5000, 'auto.offset.reset': 'smallest', 'enable.auto.commit': 'false', 'fetch.min.bytes': 307200 } consumer = DeserializingConsumer(conf) consumer.subscribe(['bitcoin-transaction']) messages = [] try: while True: msg = consumer.poll(timeout=1000) if msg is None: continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): raise KafkaException(msg.error()) else: obj = json.loads(msg.value()) transaction = dict_to_transaction(obj) messages.append(transaction) if len(messages) > 100: messages = sorted(messages, key=lambda x: x.price, reverse=True)[0:10] print(messages) consumer.commit(asynchronous=False) except KeyboardInterrupt: sys.stderr.write('%% Aborted by user\n') finally: # Close down consumer to commit final offsets. consumer.close()
def create_consumer(self, registry_client): """ Subscribes to topic defined in configs and creates a consumer to deserialize messages from topic :param registry_client: SchemaRegistryClient object get this from register_client() :return: DeserializingConsumer object """ metadata_schema = None topic = None if self.metadata_type == "COLLECTION": metadata_schema = registry_client.get_latest_version( self.collection_topic + '-value').schema.schema_str topic = self.collection_topic if self.metadata_type == "GRANULE": metadata_schema = registry_client.get_latest_version( self.granule_topic + '-value').schema.schema_str topic = self.granule_topic metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) consumer_conf = {'bootstrap.servers': self.brokers} if self.security: consumer_conf['security.protocol'] = 'SSL' consumer_conf['ssl.ca.location'] = self.conf['security']['caLoc'] consumer_conf['ssl.key.location'] = self.conf['security']['keyLoc'] consumer_conf['ssl.certificate.location'] = self.conf['security'][ 'certLoc'] meta_consumer_conf = consumer_conf meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8') meta_consumer_conf['value.deserializer'] = metadata_deserializer meta_consumer_conf['group.id'] = self.group_id meta_consumer_conf['auto.offset.reset'] = self.auto_offset_reset metadata_consumer = DeserializingConsumer(meta_consumer_conf) metadata_consumer.subscribe([topic]) return metadata_consumer
class Consumer: def __init__(self, bootstrap_servers: str, topic: str, group: str, callback: Callable[[Message], None], value_deserializer=None, poll_timeout: float = 1.0, config=None): consumer_config = { "bootstrap.servers": bootstrap_servers, "group.id": group, "value.deserializer": value_deserializer } if config: consumer_config.update(config) self.consumer = DeserializingConsumer(consumer_config) self.topic = topic self.callback = callback self.poll_timeout = poll_timeout def start(self): logger.info("Starting Kafka consumer") self.consumer.subscribe([self.topic]) while True: message = self.consumer.poll(self.poll_timeout) if message is None: continue if message.error(): print(f"Consumer error: {message.error()}") continue self.callback(message) def close(self): logger.info("Closing Kafka consumer") self.consumer.close()
def __init__(self, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True): # Schema Registry configuration self.schema_registry_conf = self.getSchemaRegistryConf() # Schema Registry Client self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf) # Key Deserializer self.key_deserializer = StringDeserializer('utf_8') # Value Deserializer # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change # https://github.com/confluentinc/confluent-kafka-python/issues/834 self.value_deserializer = AvroDeserializer(value_schema,self.schema_registry_client) # Get the consumer configuration self.consumer_conf = self.getConsumerConfiguration(groupID, autocommit) # Create the consumer self.consumer = DeserializingConsumer(self.consumer_conf) # Subscribe to the topic self.consumer.subscribe([topic_name])
def run_consumer(container_manager): schema_registry_conf = {'url': config['kafka']['schema_registry']} schema_registry_client = SchemaRegistryClient(schema_registry_conf) avro_deserializer = AvroDeserializer(schemas.run_record_schema, schema_registry_client) string_deserializer = StringDeserializer('utf_8') conf = { 'bootstrap.servers': config['kafka']['servers'], 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': "runs-consumers", 'auto.offset.reset': 'earliest', 'enable.auto.commit': 'false' } consumer = DeserializingConsumer(conf) print('[+] Listening for incoming runs') try: consumer_topics = [config['kafka']['runs-topic']] consumer.subscribe(consumer_topics) while True: try: msg = consumer.poll(timeout=1.0) if msg is None: continue if msg.error(): raise KafkaException(msg.error()) else: print('[-] Run initialization') print(msg.value()) consumer.commit(asynchronous=False) # handlers.handle_run_execution(container_manager, msg.value()) threading.Thread(target=handlers.handle_run_execution, args=(container_manager, msg.value())).start() except ConsumeError as e: print( f'[Exception] error_code: {e.code()} message: {e.message()} exception: {e}' ) finally: consumer.close()
def consume(consumer: DeserializingConsumer, timeout) -> iter: while True: # Waiting for message until timeout reached if there is no message. # If message exists, message will be returned. message = consumer.poll(timeout) # print('[kafka] polling...') if message is None: continue if message.error(): print('Consumer error: {}'.format(message.error())) continue yield message
def __init__(self, bootstrap_servers: str, topic: str, group: str, callback: Callable[[Message], None], value_deserializer=None, poll_timeout: float = 1.0, config=None): consumer_config = { "bootstrap.servers": bootstrap_servers, "group.id": group, "value.deserializer": value_deserializer } if config: consumer_config.update(config) self.consumer = DeserializingConsumer(consumer_config) self.topic = topic self.callback = callback self.poll_timeout = poll_timeout
def __init__(self, consumer_name, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True): # Consumer name for logging purposes self.logging_prefix = '['+ consumer_name + '][KafkaAvroConsumer]' # Schema Registry configuration self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf() # Schema Registry Client self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf) # Key Deserializer self.key_deserializer = StringDeserializer('utf_8') # Get Schema for the value self.schema_id_value = self.schema_registry_client.get_latest_version(topic_name + "-value").schema_id # print('The Schema ID for the value is: {}'.format(self.schema_id_value)) self.value_schema = self.schema_registry_client.get_schema(self.schema_id_value).schema_str print(self.logging_prefix + ' - Value Subject: {}'.format(topic_name)) print(self.logging_prefix + ' - Value Schema:') print(self.logging_prefix + ' - -------------\n') print(self.logging_prefix + ' - ' + self.value_schema + '\n') # Value Deserializer # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change # https://github.com/confluentinc/confluent-kafka-python/issues/834 self.value_deserializer = AvroDeserializer(self.value_schema,self.schema_registry_client) # Get the consumer configuration self.consumer_conf = EventBackboneConfig.getConsumerConfiguration(groupID, autocommit, self.key_deserializer, self.value_deserializer) # Create the consumer self.consumer = DeserializingConsumer(self.consumer_conf) # Print consumer configuration EventBackboneConfig.printConsumerConfiguration(self.logging_prefix,self.consumer_conf,self.schema_registry_conf['url']) # Subscribe to the topic self.consumer.subscribe([topic_name])
def main(args): topic = args.topic outputtopic = args.outputtopic schema_str = EventSchema schema_enriched_event_str = EnrichedEventSchema sr_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(sr_conf) avro_deserializer = AvroDeserializer(schema_str, schema_registry_client) string_deserializer = StringDeserializer('utf_8') avro_serializer = AvroSerializer(schema_enriched_event_str, schema_registry_client) consumer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': args.group + str(random.Random()), 'auto.offset.reset': "latest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) cluster = Cluster([args.host]) session = cluster.connect("datascience") session.row_factory = dict_factory producer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.serializer': StringSerializer('utf_8'), 'value.serializer': avro_serializer } producer = SerializingProducer(producer_conf) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. start = time.time() msg = consumer.poll(1.0) if msg is None: continue evt = msg.value() enrich(evt, session, producer, outputtopic) except Exception: print('Exception', sys.exc_info()[0]) continue consumer.close()
def main(): schema_registry_client = SchemaRegistryClient({'url': SCHEMA_REGISTRY_URL}) avro_deserializer = AvroDeserializer( schema_registry_client=schema_registry_client) string_deserializer = StringDeserializer('utf_8') consumer_conf = { 'bootstrap.servers': BOOTSTRAP_SERVERS, 'key.deserializer': string_deserializer, 'max.poll.interval.ms': MAX_POLL_INTERVAL_MS, 'value.deserializer': avro_deserializer, 'group.id': CONSUMER_GROUP } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([TOPIC]) while True: try: msg = consumer.poll(1.0) if msg is None: continue msg_value = msg.value() if msg_value is not None: try: measurements = list(dict(msg_value).get("measurements")) measurements_df = pd.DataFrame(measurements) groups = measurements_df.groupby("tenant") for _, group in groups: tenant = group.iloc[0]['tenant'] device_registry = DeviceRegistry( tenant, AIRQO_BASE_URL) group_measurements = list( group.to_dict(orient="records")) for i in range(0, len(group_measurements), int(REQUEST_BODY_SIZE)): measurements_list = group_measurements[ i:i + int(REQUEST_BODY_SIZE)] device_registry.insert_events(measurements_list) except Exception as ex: print(ex) except KeyboardInterrupt: break consumer.close()
def __new__(cls): # Consumer configuration. Must match Stimzi/Kafka configuration. config = { 'bootstrap.servers': "jizt-cluster-kafka-bootstrap:9092", 'client.id': socket.gethostname(), 'group.id': "text-preprocessor", 'auto.offset.reset': "earliest", 'session.timeout.ms': 10000, 'enable.auto.commit': True, # default 'auto.commit.interval.ms': 5000, # default 'key.deserializer': StringDeserializer('utf_8'), 'value.deserializer': StringDeserializer('utf_8') } return DeserializingConsumer(config)
def __init__(self, db: SummaryDAOFactory): super(ConsumerLoop, self).__init__() logging.basicConfig( format='%(asctime)s %(name)s %(levelname)-8s %(message)s', level=logging.DEBUG, datefmt='%d/%m/%Y %I:%M:%S %p') self.logger = logging.getLogger("DispatcherConsumerLoop") # Consumer configuration. Must match Stimzi/Kafka configuration. config = { 'bootstrap.servers': "jizt-cluster-kafka-bootstrap:9092", 'client.id': socket.gethostname(), 'group.id': "dispatcher", 'auto.offset.reset': "earliest", 'session.timeout.ms': 10000, 'enable.auto.commit': True, # default 'auto.commit.interval.ms': 5000, # default 'key.deserializer': StringDeserializer('utf_8'), 'value.deserializer': StringDeserializer('utf_8') } self.consumer = DeserializingConsumer(config) self.db = db self.consumed_msg_schema = TextPostprocessingConsumedMsgSchema()
def main(args): topic = args.topic key_schema_str = open('schema/KeySchema.avsc', "r").read() value_schema_str = open('schema/ValueSchema.avsc', "r").read() sr_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(sr_conf) avro_key_deserializer = AvroDeserializer(key_schema_str, schema_registry_client, dict_to_user_quote_key) avro_value_deserializer = AvroDeserializer(value_schema_str, schema_registry_client, dict_to_user_quote_value) consumer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': avro_key_deserializer, 'value.deserializer': avro_value_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue user_quote = msg.value() if user_quote is not None: print("User {} Quote record: product_id: {}\n" "\tquoted_price: {}\n" "\tquoted_quantity: {}\n" "\tuser_note: {}\n".format(msg.key().user_id, user_quote.product_id, user_quote.quoted_price, user_quote.quoted_quantity, user_quote.user_note)) except KeyboardInterrupt: break consumer.close()
def main(): top = 20 consumer = DeserializingConsumer({ 'bootstrap.servers': os.environ['KAFKA_BROKERS'], 'security.protocol': 'SASL_SSL', 'sasl.mechanism': 'SCRAM-SHA-512', 'sasl.password': os.environ['KAFKA_PASS'], 'sasl.username': os.environ['KAFKA_USER'], 'ssl.ca.location': '/usr/local/share/ca-certificates/Yandex/YandexCA.crt', 'group.id': 'group1', 'key.deserializer': StringDeserializer(), 'value.deserializer': LongDeserializer(), }) consumer.subscribe(['streams-wordcount-output']) try: frequencies = [] while True: msg = consumer.poll(1.0) if msg is None: if frequencies: print('==============================================') print(f'Current list of top {top} most frequent words:') frequencies = sorted(frequencies, key=lambda x: x[1], reverse=True) for frequency in frequencies[0:top]: print(f'{frequency[0]}: {frequency[1]}') frequencies.clear() continue elif msg.error(): print('error: {}'.format(msg.error())) else: frequencies.append((msg.key(), msg.value())) except KeyboardInterrupt: pass finally: consumer.close()