def __init__(self, consumer=None): super().__init__() self.logger = logging.getLogger(__package__) self.logger.debug("Initializing the consumer") self.consumer = consumer self.message_handler = HandleMessage() self._stop_event = threading.Event() # Track whether there is currently a message being processed. Just a raw # bool is OK because the subscription is configured to prefetch 1 # message at a time - i.e. this function should NOT run in parallel self._processing = False while self.consumer is None: try: self.logger.debug("Getting the kafka consumer") config = kafka_config_from_env() config['key.deserializer'] = StringDeserializer('utf_8') config['value.deserializer'] = StringDeserializer('utf_8') config['on_commit'] = self.on_commit config['group.id'] = GROUP_ID config['auto.offset.reset'] = 'earliest' self.consumer = DeserializingConsumer(config) except KafkaException as err: self.logger.error("Could not initialize the consumer: %s", err) raise ConnectionException( "Could not initialize the consumer") from err self.consumer.subscribe([TRANSACTIONS_TOPIC])
class Configuration: commit_log_topic: str snapshot_topic: str bootstrap_servers: str group_id: str batch_timeout_sec: int = 5 messages_per_transaction: int = 2000 store_consumer: dict = field( default_factory=lambda: { 'bootstrap.servers': None, 'group.id': None, 'auto.offset.reset': 'earliest', 'enable.auto.commit': False, 'enable.auto.offset.store': False, 'enable.partition.eof': True, 'key.deserializer': StringDeserializer(), 'value.deserializer': JSONDeserializer(), # 'stats_cb': publish_statistics, # 'statistics.interval.ms': 15000, }) consumer: dict = field( default_factory=lambda: { 'bootstrap.servers': None, 'group.id': None, 'auto.offset.reset': 'earliest', 'enable.auto.commit': False, 'enable.auto.offset.store': False, 'enable.partition.eof': False, 'key.deserializer': StringDeserializer(), 'value.deserializer': JSONDeserializer(), # 'value.deserializer': DummyDeserializer(), # 'stats_cb': publish_statistics, # 'statistics.interval.ms': 15000, }) producer: dict = field( default_factory=lambda: { 'bootstrap.servers': None, 'transactional.id': None, 'transaction.timeout.ms': 60000, 'enable.idempotence': True, 'key.serializer': StringSerializer('utf_8'), 'value.serializer': JSONSerializer(), 'debug': 'broker,eos', }) def __post_init__(self): self.store_consumer['bootstrap.servers'] = \ self.consumer['bootstrap.servers'] = \ self.producer['bootstrap.servers'] = \ self.bootstrap_servers self.store_consumer['group.id'] = \ self.consumer['group.id'] = \ self.producer['transactional.id'] = \ self.group_id
def __new__(cls): # Consumer configuration. Must match Stimzi/Kafka configuration. config = { 'bootstrap.servers': "jizt-cluster-kafka-bootstrap:9092", 'client.id': socket.gethostname(), 'group.id': "text-preprocessor", 'auto.offset.reset': "earliest", 'session.timeout.ms': 10000, 'enable.auto.commit': True, # default 'auto.commit.interval.ms': 5000, # default 'key.deserializer': StringDeserializer('utf_8'), 'value.deserializer': StringDeserializer('utf_8') } return DeserializingConsumer(config)
def receive(): json_deserializer = JSONDeserializer(USER_SCHEMA, from_dict=dict_to_user) string_deserializer = StringDeserializer('utf_8') consumer_conf = { 'bootstrap.servers': 'localhost:9092', 'key.deserializer': string_deserializer, 'value.deserializer': json_deserializer, 'group.id': 'django-kafka', 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([USER_TOPIC]) """ The idea is to start the Kafka consumer when the message is sent to the Kafka producer. Resulting in two queues: Task Queue and Message/Content Queue. Multi-threading might be an overkill for a simple application, hence the for loop (Temporary). """ for x in range(200): try: msg = consumer.poll(timeout=5.0) if msg is not None: user = msg.value() if user is not None: print("User record {}: username: {}\n" "\tdata: {}\n".format(msg.key(), user.username, user.data)) except Exception as e: print('An exception occurred: {}'.format(e)) logging.error(traceback.format_exc())
def from_file(cls, config_file_path, **kwargs): """ config_file_path = path to the config file to use in defining this consumer Possible keyword arguments: logger = the logger object to use !!!!! any other keyword arguments will be added to the configuration (with underscores replaced with dots) !!!!! """ parser = ConfigFileParser(config_file_path, **kwargs) configs = parser.get_config_dict_for_groups(['cluster', 'consumer']) for argname, arg in kwargs.items(): if argname == 'logger': continue configs[argname.replace('_', '.')] = arg #if the group.id has been set as "new" generate a new group ID if 'group.id' in configs.keys() and configs['group.id'].lower( ) == 'new': configs['group.id'] = str(uuid.uuid1()) #if one of several recognized deserializers have been given as config paramenters for the key/value serializer, replace them with the actual class names_to_classes = { 'DoubleDeserializer': DoubleDeserializer(), 'IntegerDeserializer': IntegerDeserializer(), 'StringDeserializer': StringDeserializer(), 'DataFileChunkDeserializer': DataFileChunkDeserializer(), } configs_to_check = ['key.deserializer', 'value.deserializer'] for cfg in configs_to_check: if cfg in configs.keys(): if configs[cfg] in names_to_classes: configs[cfg] = names_to_classes[configs[cfg]] return cls(configs)
def consume(self, count: int): consumer = DeserializingConsumer({ 'bootstrap.servers': self.brokers, 'key.deserializer': StringDeserializer('utf_8'), 'value.deserializer': self._make_deserializer(), 'group.id': self.group, 'auto.offset.reset': "earliest" }) consumer.subscribe([self.topic]) self.logger.info("Consuming %d %s records from topic %s with group %s", count, self.schema_type.name, self.topic, self.group) while self.consumed < count: msg = consumer.poll(1) if msg is None: continue payload = msg.value() self.logger.debug("Consumed %d at %d", payload.val, msg.offset()) assert payload.val == self.consumed self.consumed += 1 consumer.close()
def test_string_serialization(kafka_cluster, data, codec): """ Tests basic unicode serialization/deserialization functionality Args: kafka_cluster (KafkaClusterFixture): cluster fixture data (unicode): input data codec (str): encoding type """ topic = kafka_cluster.create_topic("serialization-string") producer = kafka_cluster.producer(value_serializer=StringSerializer(codec)) producer.produce(topic, value=data) producer.flush() consumer = kafka_cluster.consumer( value_deserializer=StringDeserializer(codec)) consumer.subscribe([topic]) msg = consumer.poll() assert msg.value() == data consumer.close()
def main(args): topic = args.topic protobuf_deserializer = ProtobufDeserializer(user_pb2.User) string_deserializer = StringDeserializer('utf_8') consumer_conf = {'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': protobuf_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest"} consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: msg = consumer.poll(1.0) if msg is None: continue user = msg.value() if user is not None: print(f"User record {msg.key()}:\n name: {user.name}\n" f"\tfavorite_number: {user.favorite_color}\n" f"\tfavorite_color: {user.favorite_number}\n") except KeyboardInterrupt: break consumer.close()
def main(args): topic = args.topic protobuf_deserializer = ProtobufDeserializer(user_pb2.User) string_deserializer = StringDeserializer('utf_8') consumer_conf = {'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': protobuf_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest"} consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue user = msg.value() if user is not None: print("User record {}: name: {}\n" "\tfavorite_number: {}\n" "\tfavorite_color: {}\n" .format(msg.key(), user.name, user.favorite_color, user.favorite_number)) except KeyboardInterrupt: break consumer.close()
def main(args): topic = args.topic schema_str = """ { "$schema": "http://json-schema.org/draft-07/schema#", "title": "User", "description": "A Confluent Kafka Python User", "type": "object", "properties": { "name": { "description": "User's name", "type": "string" }, "favorite_number": { "description": "User's favorite number", "type": "number", "exclusiveMinimum": 0 }, "favorite_color": { "description": "User's favorite color", "type": "string" } }, "required": [ "name", "favorite_number", "favorite_color" ] } """ json_deserializer = JSONDeserializer(schema_str, from_dict=dict_to_user) string_deserializer = StringDeserializer('utf_8') consumer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': json_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue user = msg.value() if user is not None: print("User record {}: name: {}\n" "\tfavorite_number: {}\n" "\tfavorite_color: {}\n".format(msg.key(), user.name, user.favorite_color, user.favorite_number)) except KeyboardInterrupt: break consumer.close()
def create_deserializer(self): self.deserializer = {} if self.in_topic is not None: for topic in self.in_topic: if self.in_schema[topic] is None: self.deserializer[topic] = StringDeserializer("utf_8") else: schema_str = self.in_schema[topic].schema_str self.deserializer[topic] = AvroDeserializer( schema_str, self.schema_registry)
def main(args): topic = args.topic outputtopic = args.outputtopic schema_str = EventSchema schema_enriched_event_str = EnrichedEventSchema sr_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(sr_conf) avro_deserializer = AvroDeserializer(schema_str, schema_registry_client) string_deserializer = StringDeserializer('utf_8') avro_serializer = AvroSerializer(schema_enriched_event_str, schema_registry_client) consumer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': args.group + str(random.Random()), 'auto.offset.reset': "latest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) cluster = Cluster([args.host]) session = cluster.connect("datascience") session.row_factory = dict_factory producer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.serializer': StringSerializer('utf_8'), 'value.serializer': avro_serializer } producer = SerializingProducer(producer_conf) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. start = time.time() msg = consumer.poll(1.0) if msg is None: continue evt = msg.value() enrich(evt, session, producer, outputtopic) except Exception: print('Exception', sys.exc_info()[0]) continue consumer.close()
def main(): schema_registry_client = SchemaRegistryClient({'url': SCHEMA_REGISTRY_URL}) avro_deserializer = AvroDeserializer( schema_registry_client=schema_registry_client) string_deserializer = StringDeserializer('utf_8') consumer_conf = { 'bootstrap.servers': BOOTSTRAP_SERVERS, 'key.deserializer': string_deserializer, 'max.poll.interval.ms': MAX_POLL_INTERVAL_MS, 'value.deserializer': avro_deserializer, 'group.id': CONSUMER_GROUP } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([TOPIC]) while True: try: msg = consumer.poll(1.0) if msg is None: continue msg_value = msg.value() if msg_value is not None: try: measurements = list(dict(msg_value).get("measurements")) measurements_df = pd.DataFrame(measurements) groups = measurements_df.groupby("tenant") for _, group in groups: tenant = group.iloc[0]['tenant'] device_registry = DeviceRegistry( tenant, AIRQO_BASE_URL) group_measurements = list( group.to_dict(orient="records")) for i in range(0, len(group_measurements), int(REQUEST_BODY_SIZE)): measurements_list = group_measurements[ i:i + int(REQUEST_BODY_SIZE)] device_registry.insert_events(measurements_list) except Exception as ex: print(ex) except KeyboardInterrupt: break consumer.close()
def main(args): topic = args.topic schema_str = """ { "namespace": "confluent.io.examples.serialization.avro", "name": "User", "type": "record", "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": "int"}, {"name": "favorite_color", "type": "string"} ] } """ sr_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(sr_conf) avro_deserializer = AvroDeserializer(schema_str, schema_registry_client, dict_to_user) string_deserializer = StringDeserializer('utf_8') consumer_conf = {'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest"} consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue user = msg.value() if user is not None: print("User record {}: name: {}\n" "\tfavorite_number: {}\n" "\tfavorite_color: {}\n" .format(msg.key(), user.name, user.favorite_color, user.favorite_number)) except KeyboardInterrupt: break consumer.close()
def __init__(self, db: SummaryDAOFactory): super(ConsumerLoop, self).__init__() logging.basicConfig( format='%(asctime)s %(name)s %(levelname)-8s %(message)s', level=logging.DEBUG, datefmt='%d/%m/%Y %I:%M:%S %p') self.logger = logging.getLogger("DispatcherConsumerLoop") # Consumer configuration. Must match Stimzi/Kafka configuration. config = { 'bootstrap.servers': "jizt-cluster-kafka-bootstrap:9092", 'client.id': socket.gethostname(), 'group.id': "dispatcher", 'auto.offset.reset': "earliest", 'session.timeout.ms': 10000, 'enable.auto.commit': True, # default 'auto.commit.interval.ms': 5000, # default 'key.deserializer': StringDeserializer('utf_8'), 'value.deserializer': StringDeserializer('utf_8') } self.consumer = DeserializingConsumer(config) self.db = db self.consumed_msg_schema = TextPostprocessingConsumedMsgSchema()
def main(): top = 20 consumer = DeserializingConsumer({ 'bootstrap.servers': os.environ['KAFKA_BROKERS'], 'security.protocol': 'SASL_SSL', 'sasl.mechanism': 'SCRAM-SHA-512', 'sasl.password': os.environ['KAFKA_PASS'], 'sasl.username': os.environ['KAFKA_USER'], 'ssl.ca.location': '/usr/local/share/ca-certificates/Yandex/YandexCA.crt', 'group.id': 'group1', 'key.deserializer': StringDeserializer(), 'value.deserializer': LongDeserializer(), }) consumer.subscribe(['streams-wordcount-output']) try: frequencies = [] while True: msg = consumer.poll(1.0) if msg is None: if frequencies: print('==============================================') print(f'Current list of top {top} most frequent words:') frequencies = sorted(frequencies, key=lambda x: x[1], reverse=True) for frequency in frequencies[0:top]: print(f'{frequency[0]}: {frequency[1]}') frequencies.clear() continue elif msg.error(): print('error: {}'.format(msg.error())) else: frequencies.append((msg.key(), msg.value())) except KeyboardInterrupt: pass finally: consumer.close()
def getConfigs(): value_deserializer = ProtobufDeserializer(FoodPreferences_pb2.PersonFood) configs = { 'bootstrap.servers': '<CCLOUD_DNS>', 'security.protocol': 'SASL_SSL', 'sasl.mechanism': 'PLAIN', 'sasl.username': '******', 'sasl.password': '******', 'group.id': 'consumingPythonWorld', 'client.id': 'pythonConsumption', 'key.deserializer': StringDeserializer('utf_8'), 'value.deserializer': value_deserializer } return configs
def main(): string_deserializer = StringDeserializer('utf_8') conf = { 'bootstrap.servers': 'localhost:9092', 'group.id': 'bitcoin_group', 'key.deserializer': string_deserializer, 'value.deserializer': string_deserializer, 'session.timeout.ms': 6000, 'fetch.wait.max.ms': 5000, 'auto.offset.reset': 'smallest', 'enable.auto.commit': 'false', 'fetch.min.bytes': 307200 } consumer = DeserializingConsumer(conf) consumer.subscribe(['bitcoin-transaction']) messages = [] try: while True: msg = consumer.poll(timeout=1000) if msg is None: continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): raise KafkaException(msg.error()) else: obj = json.loads(msg.value()) transaction = dict_to_transaction(obj) messages.append(transaction) if len(messages) > 100: messages = sorted(messages, key=lambda x: x.price, reverse=True)[0:10] print(messages) consumer.commit(asynchronous=False) except KeyboardInterrupt: sys.stderr.write('%% Aborted by user\n') finally: # Close down consumer to commit final offsets. consumer.close()
def run_consumer(container_manager): schema_registry_conf = {'url': config['kafka']['schema_registry']} schema_registry_client = SchemaRegistryClient(schema_registry_conf) avro_deserializer = AvroDeserializer(schemas.run_record_schema, schema_registry_client) string_deserializer = StringDeserializer('utf_8') conf = { 'bootstrap.servers': config['kafka']['servers'], 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': "runs-consumers", 'auto.offset.reset': 'earliest', 'enable.auto.commit': 'false' } consumer = DeserializingConsumer(conf) print('[+] Listening for incoming runs') try: consumer_topics = [config['kafka']['runs-topic']] consumer.subscribe(consumer_topics) while True: try: msg = consumer.poll(timeout=1.0) if msg is None: continue if msg.error(): raise KafkaException(msg.error()) else: print('[-] Run initialization') print(msg.value()) consumer.commit(asynchronous=False) # handlers.handle_run_execution(container_manager, msg.value()) threading.Thread(target=handlers.handle_run_execution, args=(container_manager, msg.value())).start() except ConsumeError as e: print( f'[Exception] error_code: {e.code()} message: {e.message()} exception: {e}' ) finally: consumer.close()
def main(args): topic = args.topic schema_str = MetricSchema sr_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(sr_conf) avro_deserializer = AvroDeserializer(schema_str, schema_registry_client) string_deserializer = StringDeserializer('utf_8') consumer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) client = InfluxDBClient(host=args.host_influx, port=8086, username='******', password='******') while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue timespent = msg.value() if timespent is not None: print("time ==>", timespent) print(timespent["metricName"]) print(timespent["time"]) client.switch_database('datascience') json_body = [{ "measurement": "metric", "fields": { "name": timespent["metricName"], "value": timespent["time"] } }] client.write_points(json_body) except KeyboardInterrupt: break consumer.close()
def create_consumer(self, registry_client): """ Subscribes to topic defined in configs and creates a consumer to deserialize messages from topic :param registry_client: SchemaRegistryClient object get this from register_client() :return: DeserializingConsumer object """ metadata_schema = None topic = None if self.metadata_type == "COLLECTION": metadata_schema = registry_client.get_latest_version( self.collection_topic + '-value').schema.schema_str topic = self.collection_topic if self.metadata_type == "GRANULE": metadata_schema = registry_client.get_latest_version( self.granule_topic + '-value').schema.schema_str topic = self.granule_topic metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) consumer_conf = {'bootstrap.servers': self.brokers} if self.security: consumer_conf['security.protocol'] = 'SSL' consumer_conf['ssl.ca.location'] = self.conf['security']['caLoc'] consumer_conf['ssl.key.location'] = self.conf['security']['keyLoc'] consumer_conf['ssl.certificate.location'] = self.conf['security'][ 'certLoc'] meta_consumer_conf = consumer_conf meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8') meta_consumer_conf['value.deserializer'] = metadata_deserializer meta_consumer_conf['group.id'] = self.group_id meta_consumer_conf['auto.offset.reset'] = self.auto_offset_reset metadata_consumer = DeserializingConsumer(meta_consumer_conf) metadata_consumer.subscribe([topic]) return metadata_consumer
def __init__(self, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True): # Schema Registry configuration self.schema_registry_conf = self.getSchemaRegistryConf() # Schema Registry Client self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf) # Key Deserializer self.key_deserializer = StringDeserializer('utf_8') # Value Deserializer # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change # https://github.com/confluentinc/confluent-kafka-python/issues/834 self.value_deserializer = AvroDeserializer(value_schema,self.schema_registry_client) # Get the consumer configuration self.consumer_conf = self.getConsumerConfiguration(groupID, autocommit) # Create the consumer self.consumer = DeserializingConsumer(self.consumer_conf) # Subscribe to the topic self.consumer.subscribe([topic_name])
def __init__(self, consumer_name, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True): # Consumer name for logging purposes self.logging_prefix = '['+ consumer_name + '][KafkaAvroConsumer]' # Schema Registry configuration self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf() # Schema Registry Client self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf) # Key Deserializer self.key_deserializer = StringDeserializer('utf_8') # Get Schema for the value self.schema_id_value = self.schema_registry_client.get_latest_version(topic_name + "-value").schema_id # print('The Schema ID for the value is: {}'.format(self.schema_id_value)) self.value_schema = self.schema_registry_client.get_schema(self.schema_id_value).schema_str print(self.logging_prefix + ' - Value Subject: {}'.format(topic_name)) print(self.logging_prefix + ' - Value Schema:') print(self.logging_prefix + ' - -------------\n') print(self.logging_prefix + ' - ' + self.value_schema + '\n') # Value Deserializer # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change # https://github.com/confluentinc/confluent-kafka-python/issues/834 self.value_deserializer = AvroDeserializer(self.value_schema,self.schema_registry_client) # Get the consumer configuration self.consumer_conf = EventBackboneConfig.getConsumerConfiguration(groupID, autocommit, self.key_deserializer, self.value_deserializer) # Create the consumer self.consumer = DeserializingConsumer(self.consumer_conf) # Print consumer configuration EventBackboneConfig.printConsumerConfiguration(self.logging_prefix,self.consumer_conf,self.schema_registry_conf['url']) # Subscribe to the topic self.consumer.subscribe([topic_name])
def receive_record(args): """ Receives Record using a DeserializingConsumer & AvroDeserializer """ topics = [args.topic.rstrip()] schema_registry_config = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(schema_registry_config) avro_deserializer = AvroDeserializer(schema_registry_client, DATA_SCHEMA, dict_to_data) string_deserializer = StringDeserializer('utf_8') consumer_config = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': args.group, 'auto.offset.reset': 'earliest' } consumer = DeserializingConsumer(consumer_config) consumer.subscribe(topics) print(f'Consuming data records from topic(s) {topics}. ^C to exit.') while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(10.0) if msg is None: print('\t---Waiting. . .') continue data = msg.value() if data is not None: print(f'Data record {msg.key()}:\n' f'\tValues: {data}') except KeyboardInterrupt: break print('\nClosing consumer.') consumer.close()
def main(args): topic = args.topic schema_registry_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(schema_registry_conf) schema_obj = schema_registry_client.get_latest_version( subject_name='example_serde_json-value') json_deserializer = JSONDeserializer(schema_obj.schema.schema_str, from_dict=dict_to_user) string_deserializer = StringDeserializer('utf_8') consumer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': json_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: msg = consumer.poll(1.0) if msg is None: continue user = msg.value() if user is not None: print(f"User record {msg.key()}:\n name: {user.name}\n" f"\tfavorite_number: {user.favorite_color}\n" f"\tfavorite_color: {user.favorite_number}\n") except KeyboardInterrupt: break consumer.close()
def __init__( self, topic, value_deserializer, num_workers, num_threads, kafka_config, schema_registry_url, callback, ): logger.debug( f'Create a kafka consumer for topic {topic} with {num_workers} workers and {num_threads} threads', ) self.topic = topic self.num_threads = num_threads self.num_workers = num_workers self.kafka_config = kafka_config self.kafka_config = { 'key.deserializer': StringDeserializer('utf_8'), 'enable.auto.commit': False, 'auto.offset.reset': 'latest', 'value.deserializer': make_deserializer( topic=self.topic, from_dict=value_deserializer.from_dict, schema_registry_url=schema_registry_url, ), } self.kafka_config.update(kafka_config) self.workers = [] self.callback = callback
def main(args): topic = args.topic outputtopic = args.outputtopic schema_str = EventSchema schema_enriched_event_str = EnrichedEventSchema sr_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(sr_conf) avro_deserializer = AvroDeserializer(schema_str, schema_registry_client) string_deserializer = StringDeserializer('utf_8') avro_serializer = AvroSerializer(schema_enriched_event_str, schema_registry_client) consumer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': args.group + str(random.Random()), 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) cluster = Cluster([args.host]) session = cluster.connect("datascience") session.row_factory = dict_factory producer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.serializer': StringSerializer('utf_8'), 'value.serializer': avro_serializer } producer = SerializingProducer(producer_conf) loop = asyncio.get_event_loop() while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. start = time.time() msg = consumer.poll(1.0) if msg is None: continue evt = msg.value() print("msg=>", evt) def enrich(evt): print("evt", evt) if evt is not None: print("récupérer dans kafka") row = session.execute( GET_ENRICHED_DATA_QUERY, (evt["EventHeader"]["acteurDeclencheur"]["idPersonne"], )).one() if row: evt['EnrichedData'] = row # evt['EventBusinessContext'] = evt["EventBusinessContext"][1] EnrichedEvent = { "eventId": evt["EventHeader"]["eventId"], "dateTimeRef": evt["EventHeader"]["dateTimeRef"], "nomenclatureEv": evt["EventHeader"]["nomenclatureEv"], "canal": evt["EventHeader"]["canal"], "media": evt["EventHeader"]["media"], "schemaVersion": evt["EventHeader"]["schemaVersion"], "headerVersion": evt["EventHeader"]["headerVersion"], "serveur": evt["EventHeader"]["serveur"], "adresseIP": evt["EventHeader"]["acteurDeclencheur"] ["adresseIP"], "idTelematique": evt["EventHeader"]["acteurDeclencheur"] ["idTelematique"], "idPersonne": evt["EventHeader"]["acteurDeclencheur"] ["idPersonne"], "dateNaissance": row["dateNaissance"], "paysResidence": row["paysResidence"], "paysNaissance": row["paysNaissance"], "revenusAnnuel": row["revenusAnnuel"], "csp": row["csp"], "EventBusinessContext": evt["EventBusinessContext"] } producer.produce(topic=outputtopic, key=str(uuid4()), value=EnrichedEvent, on_delivery=delivery_report) producer.flush() async_enrich = async_wrap(enrich) loop.run_until_complete(async_enrich(evt)) except Exception: print('Exception') continue consumer.close()
import time from confluent_kafka.schema_registry import SchemaRegistryClient from confluent_kafka.serialization import StringDeserializer from jlab_jaws.avro.subject_schemas.serde import ActiveAlarmSerde from jlab_jaws.eventsource.table import EventSourceTable from tabulate import tabulate from common import get_row_header bootstrap_servers = os.environ.get('BOOTSTRAP_SERVERS', 'localhost:9092') sr_conf = {'url': os.environ.get('SCHEMA_REGISTRY', 'http://localhost:8081')} schema_registry_client = SchemaRegistryClient(sr_conf) key_deserializer = StringDeserializer() value_deserializer = ActiveAlarmSerde.deserializer(schema_registry_client) def get_row(msg): timestamp = msg.timestamp() headers = msg.headers() key = msg.key() value = msg.value() if value is None: row = [key, None] else: row = [key, value]
consumer = kafka_cluster.consumer( value_deserializer=StringDeserializer(codec)) consumer.subscribe([topic]) msg = consumer.poll() assert msg.value() == data consumer.close() @pytest.mark.parametrize( "key_serializer, value_serializer, key_deserializer, value_deserializer, key, value", # noqa: E501 [(DoubleSerializer(), StringSerializer('utf_8'), DoubleDeserializer(), StringDeserializer(), -31.2168215450814477, u'Jämtland'), (StringSerializer('utf_16'), DoubleSerializer(), StringDeserializer('utf_16'), DoubleDeserializer(), u'Härjedalen', 1.2168215450814477)]) def test_mixed_serialization(kafka_cluster, key_serializer, value_serializer, key_deserializer, value_deserializer, key, value): """ Tests basic mixed serializer/deserializer functionality. Args: kafka_cluster (KafkaClusterFixture): cluster fixture key_serializer (Serializer): serializer to test key_deserializer (Deserializer): deserializer to validate serializer
# Home Assistant. Control a light via home assistant REST API. # The Hops/Kafka part is inspired by Ahmad Al-Shishtawy's blog at # https://www.logicalclocks.com/blog/using-an-external-python-kafka-client-to-interact-with-a-hopsworks-cluster # # from confluent_kafka import DeserializingConsumer from confluent_kafka.serialization import StringDeserializer import toml, json import requests # Load HopsWorks Kafka configuration - and Home Assistant API token conf = toml.load("config.toml") # Initialize a simple String deserializer for the key and value string_deserializer = StringDeserializer('utf_8') # Initialize the consumer consumer_conf = { 'bootstrap.servers': conf['hops']['url'] + ':' + conf['kafka']['port'], 'security.protocol': 'SSL', 'ssl.ca.location': conf['project']['ca_file'], 'ssl.certificate.location': conf['project']['certificate_file'], 'ssl.key.location': conf['project']['key_file'], 'ssl.key.password': conf['project']['key_password'], 'key.deserializer': string_deserializer, 'value.deserializer': string_deserializer, 'group.id': conf['kafka']['consumer']['group_id'], 'auto.offset.reset': conf['kafka']['consumer']['auto_offset_reset'], } consumer = DeserializingConsumer(consumer_conf)