def main(): string_deserializer = StringDeserializer('utf_8') conf = { 'bootstrap.servers': 'localhost:9092', 'group.id': 'bitcoin_group', 'key.deserializer': string_deserializer, 'value.deserializer': string_deserializer, 'session.timeout.ms': 6000, 'fetch.wait.max.ms': 5000, 'auto.offset.reset': 'smallest', 'enable.auto.commit': 'false', 'fetch.min.bytes': 307200 } consumer = DeserializingConsumer(conf) consumer.subscribe(['bitcoin-transaction']) messages = [] try: while True: msg = consumer.poll(timeout=1000) if msg is None: continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): raise KafkaException(msg.error()) else: obj = json.loads(msg.value()) transaction = dict_to_transaction(obj) messages.append(transaction) if len(messages) > 100: messages = sorted(messages, key=lambda x: x.price, reverse=True)[0:10] print(messages) consumer.commit(asynchronous=False) except KeyboardInterrupt: sys.stderr.write('%% Aborted by user\n') finally: # Close down consumer to commit final offsets. consumer.close()
def run_consumer(container_manager): schema_registry_conf = {'url': config['kafka']['schema_registry']} schema_registry_client = SchemaRegistryClient(schema_registry_conf) avro_deserializer = AvroDeserializer(schemas.run_record_schema, schema_registry_client) string_deserializer = StringDeserializer('utf_8') conf = { 'bootstrap.servers': config['kafka']['servers'], 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': "runs-consumers", 'auto.offset.reset': 'earliest', 'enable.auto.commit': 'false' } consumer = DeserializingConsumer(conf) print('[+] Listening for incoming runs') try: consumer_topics = [config['kafka']['runs-topic']] consumer.subscribe(consumer_topics) while True: try: msg = consumer.poll(timeout=1.0) if msg is None: continue if msg.error(): raise KafkaException(msg.error()) else: print('[-] Run initialization') print(msg.value()) consumer.commit(asynchronous=False) # handlers.handle_run_execution(container_manager, msg.value()) threading.Thread(target=handlers.handle_run_execution, args=(container_manager, msg.value())).start() except ConsumeError as e: print( f'[Exception] error_code: {e.code()} message: {e.message()} exception: {e}' ) finally: consumer.close()
class KafkaAvroConsumer: def __init__(self, consumer_name, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True): # Consumer name for logging purposes self.logging_prefix = '['+ consumer_name + '][KafkaAvroConsumer]' # Schema Registry configuration self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf() # Schema Registry Client self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf) # Key Deserializer self.key_deserializer = StringDeserializer('utf_8') # Get Schema for the value self.schema_id_value = self.schema_registry_client.get_latest_version(topic_name + "-value").schema_id # print('The Schema ID for the value is: {}'.format(self.schema_id_value)) self.value_schema = self.schema_registry_client.get_schema(self.schema_id_value).schema_str print(self.logging_prefix + ' - Value Subject: {}'.format(topic_name)) print(self.logging_prefix + ' - Value Schema:') print(self.logging_prefix + ' - -------------\n') print(self.logging_prefix + ' - ' + self.value_schema + '\n') # Value Deserializer # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change # https://github.com/confluentinc/confluent-kafka-python/issues/834 self.value_deserializer = AvroDeserializer(self.value_schema,self.schema_registry_client) # Get the consumer configuration self.consumer_conf = EventBackboneConfig.getConsumerConfiguration(groupID, autocommit, self.key_deserializer, self.value_deserializer) # Create the consumer self.consumer = DeserializingConsumer(self.consumer_conf) # Print consumer configuration EventBackboneConfig.printConsumerConfiguration(self.logging_prefix,self.consumer_conf,self.schema_registry_conf['url']) # Subscribe to the topic self.consumer.subscribe([topic_name]) def traceResponse(self, msg): print(self.logging_prefix + ' - New event received\n\tTopic: {}\n\tPartition: {}\n\tOffset: {}\n\tkey: {}\n\tvalue: {}\n' .format(msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value())) # Polls for next event def pollNextEvent(self): # Poll for messages msg = self.consumer.poll(timeout=POLL_TIMEOUT) anEvent = {} # Validate the returned message if msg is None: print(self.logging_prefix + ' - [INFO] - No new messages on the topic') return None elif msg.error(): if ("PARTITION_EOF" in msg.error()): print(self.logging_prefix + ' - [INFO] - End of partition') else: print(self.logging_prefix + ' - [ERROR] - Consumer error: {}'.format(msg.error())) return None else: # Print the message self.traceResponse(msg) return msg.value() # Polls for the next event but returns the raw event def pollNextRawEvent(self): records = self.consumer.poll(timeout=POLL_TIMEOUT) if records is None: return None if records.error(): # Stop reading if we find end of partition in the error message if ("PARTITION_EOF" in records.error()): return None else: print(self.logging_prefix + ' - [ERROR] - Consumer error: {}'.format(records.error())) return None else: self.traceResponse(records) return records def commitEvent(self,event): self.consumer.commit(event) def close(self): self.consumer.close()
consumer_config = { 'bootstrap.servers': conf['bootstrap.servers'], 'key.deserializer': key_avro_deserializer, 'value.deserializer': value_avro_deserializer, 'group.id': '1' } consumer = DeserializingConsumer(consumer_config) #create the sql interface db = create_engine("postgres://*****:*****@postgres:5432/ngrams") #Wait until the kafka topic is up before proceeding kafka_utils.wait_topic(consumer, topic) #assign partitions to consumer partitions = [] partition = TopicPartition(topic=topic, partition=0, offset=0) partitions.append(partition) consumer.assign(partitions) #maybe should change this to subscribe so we can use the on_assign and on_revoke callbacks #there is an infinite loop within this function that won't break until it sees a keyboard interupt msg = kafka_utils.consume_messages(consumer, db, kafka_utils.send_ngrams_postgres) # After you exit the poll loop commit the final offsets and close the consumer consumer.commit(message=msg, asynchronous=False) consumer.close() #finally disconnect from redis server r.connection_pool.disconnect()