def extract_time(timestamp): # recursively divide by 1000 until the time is in seconds and not in ms, µs or ns if timestamp >= 1e11: timestamp /= 1000.0 return extract_time(timestamp) return datetime.utcfromtimestamp(timestamp).replace( tzinfo=pytz.UTC).isoformat() if __name__ == "__main__": # Create the kafka consumer instance and subscribe to the topics kafka_consumer = Consumer({ 'bootstrap.servers': KAFKA_BOOTSTRAP_SERVERS, 'group.id': 'db-connector', 'auto.offset.reset': 'earliest' }) kafka_consumer.subscribe([KAFKA_TOPIC]) joined_records = list() cnt = 0 # timestamp = 0 st0 = None try: while True: msg = kafka_consumer.poll(0.1) # if there is no msg within a second, continue if msg is None: continue
def main(): logging.info("Pilot started for {}".format(DRONE_ID)) global current_angle global KILL_ALL set_homebase() # reset drone position in the positions table drone_number = int(DRONE_ID.split('_')[1]) if settings.DRONE_MODE == "live": drone = tellopy.Tello() else: drone = tellopy.Tello(port=9000 + drone_number) if DRONE_MODE == "live": drone.subscribe(drone.EVENT_FLIGHT_DATA, handler) drone.connect() drone.wait_for_connection(600) dronedata_table.update( _id=DRONE_ID, mutation={"$put": { 'connection_status': "connected" }}) # create video thread if DRONE_MODE == "video": videoThread = threading.Thread(target=play_video_from_file) elif DRONE_MODE == "live": videoThread = threading.Thread(target=get_drone_video, args=[drone]) videoThread.start() livePilotThread = threading.Thread(target=interactive_control, args=[drone]) livePilotThread.start() start_time = time.time() consumer_group = DRONE_ID + str(time.time()) positions_consumer = Consumer({ 'group.id': consumer_group, 'default.topic.config': { 'auto.offset.reset': 'latest' } }) positions_consumer.subscribe([POSITIONS_STREAM + ":" + DRONE_ID]) while True: try: logging.info("polling {}") msg = positions_consumer.poll(timeout=1) if msg is None: continue # Proceses moving instructions if not msg.error(): json_msg = json.loads(msg.value().decode('utf-8')) logging.info("new message : {}".format(json_msg)) if "action" in json_msg: action = json_msg["action"] if action == "takeoff": logging.info( ".................................................... Takeoff" ) mutation = {"$put": {"status": "busy"}} dronedata_table.update(_id=DRONE_ID, mutation=mutation) mutation = {"$put": {"last_command": "takeoff"}} dronedata_table.update(_id=DRONE_ID, mutation=mutation) if DRONE_MODE == "live" and not NO_FLIGHT: drone.takeoff() time.sleep(8) drone.up(1) mutation = {"$put": {"position.status": "flying"}} dronedata_table.update(_id=DRONE_ID, mutation=mutation) mutation = {"$put": {"status": "waiting"}} dronedata_table.update(_id=DRONE_ID, mutation=mutation) if action == "land": logging.info( ".................................................... Land" ) mutation = {"$put": {"status": "busy"}} dronedata_table.update(_id=DRONE_ID, mutation=mutation) mutation = {"$put": {"last_command": "land"}} dronedata_table.update(_id=DRONE_ID, mutation=mutation) if DRONE_MODE == "live" and not NO_FLIGHT: drone.land() mutation = {"$put": {"position.status": "landed"}} dronedata_table.update(_id=DRONE_ID, mutation=mutation) mutation = {"$put": {"status": "waiting"}} dronedata_table.update(_id=DRONE_ID, mutation=mutation) else: logging.info( "..................................................... Moving " ) from_zone = dronedata_table.find_by_id( DRONE_ID)["position"]["zone"] drop_zone = json_msg["drop_zone"] if drop_zone != from_zone: mutation = {"$put": {"status": "busy"}} dronedata_table.update(_id=DRONE_ID, mutation=mutation) if DRONE_MODE == "live" and not NO_FLIGHT: move_to_zone(drone, from_zone, drop_zone) trigger = 0 while not drone.ready: # waits 3 seconds for the drone to stabilize time.sleep(0.1) trigger += 1 if trigger > 30: break logging.info("{} ready".format(DRONE_ID)) mutation = {"$put": {"position.zone": drop_zone}} dronedata_table.update(_id=DRONE_ID, mutation=mutation) mutation = {"$put": {"status": "waiting"}} dronedata_table.update(_id=DRONE_ID, mutation=mutation) elif msg.error().code() != KafkaError._PARTITION_EOF: logging.info(msg.error()) except KeyboardInterrupt: drone.land() break except: logging.exception("land failed") drone.land() break time.sleep(1) logging.info("QUITTING") KILL_ALL = True drone.killall = True logging.info("Exiting threads ... ") time.sleep(5) logging.info("threads killed.") sys.exit()
from confluent_kafka import Consumer, KafkaError import time c = Consumer({ 'bootstrap.servers': 'kafka-master:9092', 'group.id': 'mygroup', 'default.topic.config': { 'auto.offset.reset': 'smallest' } }) c.subscribe(['stream-sim']) with open("out", "w") as f: while True: msg = c.poll(1.0) if msg is None: continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: continue else: print(msg.error()) break values = msg.value().decode('utf-8') print('Received message: {}'.format(values)) f.seek(0, 0) # return to the beginning of file f.write(values) f.truncate() # remove every thing after the last write time.sleep(1) c.close()
K_SERVER = "localhost:9092" K_SUB_TOPIC = "glove-hash-vectors" settings = { 'bootstrap.servers': K_SERVER, 'group.id': 'TODO', 'client.id': 'client-%d' % time(), 'enable.auto.commit': True, 'session.timeout.ms': 6000, 'default.topic.config': { 'auto.offset.reset': 'smallest' } } consumer = Consumer(settings) consumer.subscribe([K_SUB_TOPIC]) es = Elasticsearch() actions = [] while True: # TODO: is it really best practice to poll like this? msg = consumer.poll(0.1) if msg is None: continue if msg.error(): print('Error: %s' % msg.error().str()) continue
def __init__(self, *addresses, handle, topic_name: str, server_name: str = None, num_partitions: int = 64, replication_factor: int = 1, max_polling_timeout: float = 0.001, concurrent=False, **kwargs): """ Init Kafka RPCServer. Multiple KRPCServer can be instantiated to balance to load. If any server is down, the other KRPCServer will automatically take it place. Args: addresses: kafka broker host, port, for examples: '192.168.1.117:9092' handle: any object topic_name: kafka topic_name, if topic exists, the existing topic will be used, create a new topic otherwise. server_name: krpc server name, if None, use ip instead. num_partitions: kafka topic num_partitions replication_factor: kafka topic replication_factor. Backup counts on other brokers. The larger replication_factor is, the slower but safer. max_polling_timeout: maximum time(seconds) to block waiting for message, event or callback. encrypt: default None, if not None, will encrypt the message with the given password. It will slow down performance. verify: default False, if True, will verify the message with the given sha3 checksum from the headers. ack: default False, if True, server will confirm the message status. Disable ack will double the speed, but not exactly safe. concurrent: default False, if False, the handle work in a local threads. If concurrent is a integer K, KRPCServer will generate a pool of K threads, so handle works in multiple threads. Be aware that when benefiting from concurrency, KRPCClient should run in async mode as well. If concurrency fails, the handle itself might not support multithreading. use_gevent: default True, if True, use gevent instead of asyncio. If gevent version is lower than 1.5, krpc will not run on windows. compression: default 'none', check https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md compression.codec. 'zstd' is bugged. Check https://github.com/confluentinc/confluent-kafka-python/issues/589 use_compression: default False, custom compression using zstd. """ bootstrap_servers = ','.join(addresses) kc = KafkaControl(bootstrap_servers) self.topic_name = topic_name self.server_topic = 'krpc_{}_server'.format(topic_name) self.client_topic = 'krpc_{}_client'.format(topic_name) # create a topic that receives requests from client kc.create_topics(self.server_topic, num_partitions=num_partitions, replication_factor=replication_factor) # create a topic that send responses to client kc.create_topics(self.client_topic, num_partitions=num_partitions, replication_factor=replication_factor) # set handle self.handle = handle # set server_name if server_name is None: self.server_name = get_ip() else: self.server_name = server_name + '_' + get_ip() # set max_polling_timeout assert max_polling_timeout > 0, 'max_polling_timeout must be greater than 0' self.max_polling_timeout = max_polling_timeout self.consumer = Consumer({ 'bootstrap.servers': bootstrap_servers, 'group.id': 'krpc', 'auto.offset.reset': 'earliest', 'auto.commit.interval.ms': 1000, 'compression.codec': kwargs.get('compression', 'none') }) # message_max_bytes = kwargs.get('message_max_bytes', 1048576), # queue_buffering_max_kbytes = kwargs.get('queue_buffering_max_kbytes', 1048576), # queue_buffering_max_messages = kwargs.get('queue_buffering_max_messages', 100000), try: message_max_bytes = kwargs['message_max_bytes'] except KeyError: message_max_bytes = 1048576 try: queue_buffering_max_kbytes = kwargs['queue_buffering_max_kbytes'] except KeyError: queue_buffering_max_kbytes = 1048576 try: queue_buffering_max_messages = kwargs[ 'queue_buffering_max_messages'] except KeyError: queue_buffering_max_messages = 100000 if message_max_bytes > 1048576: logger.warning('message_max_bytes is greater than 1048576, ' 'message.max.bytes and replica.fetch.max.bytes of ' 'brokers\' config should be greater than this') self.producer = Producer({ 'bootstrap.servers': bootstrap_servers, 'on_delivery': self.delivery_report, # custom parameters 'message.max.bytes': message_max_bytes, 'queue.buffering.max.kbytes': queue_buffering_max_kbytes, 'queue.buffering.max.messages': queue_buffering_max_messages, 'compression.codec': kwargs.get('compression', 'none') }) self.consumer.subscribe([self.server_topic]) # custom callbacks, not implemented yet # self.callback_before_call = kwargs.get('callback_before_rpc', None) # self.callback_after_call = kwargs.get('callback_after_rpc', None) # set msgpack packer & unpacker, stop using a global packer or unpacker, to ensure thread safety. # self.packer = msgpack.Packer(use_bin_type=True) self.unpacker = msgpack.Unpacker(use_list=False, raw=False) # set status indicator self.is_available = True self.verify = kwargs.get('verify', False) self.verification_method = kwargs.get('verification', 'crc32') if self.verification_method == 'crc32': self.verification_method = lambda x: hex(zlib.crc32(x)).encode() elif isinstance(self.verification_method, Callable): self.verification_method = self.verification_method else: raise AssertionError('not supported verification function.') self.encrypt = kwargs.get('encrypt', None) if self.encrypt is not None: self.encrypt = AESEncryption(self.encrypt, encrypt_length=16) self.use_compression = kwargs.get('use_compression', False) self.is_closed = False # acknowledge, disable ack will double the speed, but not exactly safe. self.ack = kwargs.get('ack', False) # concurrency if isinstance(concurrent, int) and concurrent is not False: assert concurrent > 1, 'if enable concurrency, concurrent must be a integer greater than 1' use_gevent = kwargs.get('use_gevent', True) if use_gevent: from gevent.threadpool import ThreadPoolExecutor as gThreadPoolExecutor self.thread_pool = gThreadPoolExecutor(concurrent) else: self.thread_pool = ThreadPoolExecutor(concurrent) else: self.thread_pool = None
from confluent_kafka import Consumer, KafkaError import json from pyfcm import FCMNotification from config.config import config from common.kafka import Kafka from bson import json_util c = Consumer({ 'bootstrap.servers': 'localhost:9092', 'group.id': 'notify_push', 'auto.offset.reset': 'latest' }) c.subscribe(['notify_push']) while True: msg = c.poll(1.0) if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue received_message = json.loads(format(msg.value().decode('utf-8'))) print 'Received message:',json.dumps(received_message) apikeys = config.APPID if received_message["appid"] in apikeys and apikeys[received_message["appid"]]:
logger = logging.getLogger('kafka_consumer') def handler_stop_signals(signum, frame): logger.info('Received SIGTERM/SIGINT, closing program') sys.exit() signal.signal(signal.SIGINT, handler_stop_signals) signal.signal(signal.SIGTERM, handler_stop_signals) server = os.environ.get('KAFKA_SERVER') or 'kafka:9092' topic = os.environ.get('KAFKA_TOPIC') or 'actions' consumer_group = os.environ.get('KAFKA_CONSUMER_GROUP') or 'actions' consumer = Consumer({ 'bootstrap.servers': server, 'group.id': consumer_group, 'enable.auto.commit': True }) consumer.subscribe([topic]) while True: msg = consumer.poll(timeout=5.0) if msg is None: logger.info(f'Nothing received...') continue if msg.error(): # Error or event if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event logger.error(f' {msg.topic()} [{msg.partition()}] reached end of offset {msg.offset()}')
def test_consumer_start_from_committed_offset(requires_kafka): consumer_group = f"consumer-{uuid.uuid1().hex}" synchronize_commit_group = f"consumer-{uuid.uuid1().hex}" messages_delivered = defaultdict(list) def record_message_delivered(error, message): assert error is None messages_delivered[message.topic()].append(message) producer = Producer( { "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"], "on_delivery": record_message_delivered, } ) with create_topic() as topic, create_topic() as commit_log_topic: # Produce some messages into the topic. for i in range(3): producer.produce(topic, f"{i}".encode("utf8")) assert producer.flush(5) == 0, "producer did not successfully flush queue" Consumer( {"bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"], "group.id": consumer_group} ).commit(message=messages_delivered[topic][0], asynchronous=False) # Create the synchronized consumer. consumer = SynchronizedConsumer( cluster_name="default", consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset="earliest", ) assignments_received = [] def on_assign(c, assignment): assert c is consumer assignments_received.append(assignment) consumer.subscribe([topic], on_assign=on_assign) # Wait until we have received our assignments. for i in range(10): # this takes a while assert consumer.poll(1) is None if assignments_received: break assert len(assignments_received) == 1, "expected to receive partition assignment" assert {(i.topic, i.partition) for i in assignments_received[0]} == {(topic, 0)} # TODO: Make sure that all partitions are paused on assignment. # Move the committed offset forward for our synchronizing group. message = messages_delivered[topic][0] producer.produce( commit_log_topic, key=f"{message.topic()}:{message.partition()}:{synchronize_commit_group}".encode( "utf8" ), value="{}".format(message.offset() + 1).encode("utf8"), ) # Make sure that there are no messages ready to consume. assert consumer.poll(1) is None # Move the committed offset forward for our synchronizing group. message = messages_delivered[topic][0 + 1] # second message producer.produce( commit_log_topic, key=f"{message.topic()}:{message.partition()}:{synchronize_commit_group}".encode( "utf8" ), value="{}".format(message.offset() + 1).encode("utf8"), ) assert producer.flush(5) == 0, "producer did not successfully flush queue" # We should have received a single message. # TODO: Can we also assert that the position is unpaused?) for i in range(5): message = consumer.poll(1) if message is not None: break assert message is not None, "no message received" expected_message = messages_delivered[topic][0 + 1] # second message assert message.topic() == expected_message.topic() assert message.partition() == expected_message.partition() assert message.offset() == expected_message.offset() # We should not be able to continue reading into the topic. # TODO: Can we assert that the position is paused? assert consumer.poll(1) is None
def test_consumer_rebalance_from_committed_offset(requires_kafka): consumer_group = f"consumer-{uuid.uuid1().hex}" synchronize_commit_group = f"consumer-{uuid.uuid1().hex}" messages_delivered = defaultdict(list) def record_message_delivered(error, message): assert error is None messages_delivered[message.topic()].append(message) producer = Producer( { "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"], "on_delivery": record_message_delivered, } ) with create_topic(partitions=2) as topic, create_topic() as commit_log_topic: # Produce some messages into the topic. for i in range(4): producer.produce(topic, f"{i}".encode("utf8"), partition=i % 2) assert producer.flush(5) == 0, "producer did not successfully flush queue" Consumer( {"bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"], "group.id": consumer_group} ).commit( offsets=[ TopicPartition(message.topic(), message.partition(), message.offset() + 1) for message in messages_delivered[topic][:2] ], asynchronous=False, ) consumer_a = SynchronizedConsumer( cluster_name="default", consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset="earliest", ) assignments_received = defaultdict(list) def on_assign(consumer, assignment): assignments_received[consumer].append(assignment) consumer_a.subscribe([topic], on_assign=on_assign) # Wait until the first consumer has received its assignments. for i in range(10): # this takes a while assert consumer_a.poll(1) is None if assignments_received[consumer_a]: break assert ( len(assignments_received[consumer_a]) == 1 ), "expected to receive partition assignment" assert {(i.topic, i.partition) for i in assignments_received[consumer_a][0]} == { (topic, 0), (topic, 1), } assignments_received[consumer_a].pop() consumer_b = SynchronizedConsumer( cluster_name="default", consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset="earliest", ) consumer_b.subscribe([topic], on_assign=on_assign) assignments = {} # Wait until *both* consumers have received updated assignments. for consumer in [consumer_a, consumer_b]: for i in range(10): # this takes a while assert consumer.poll(1) is None if assignments_received[consumer]: break assert ( len(assignments_received[consumer]) == 1 ), "expected to receive partition assignment" assert ( len(assignments_received[consumer][0]) == 1 ), "expected to have a single partition assignment" i = assignments_received[consumer][0][0] assignments[(i.topic, i.partition)] = consumer assert set(assignments.keys()) == {(topic, 0), (topic, 1)} for expected_message in messages_delivered[topic][2:]: consumer = assignments[(expected_message.topic(), expected_message.partition())] # Make sure that there are no messages ready to consume. assert consumer.poll(1) is None # Move the committed offset forward for our synchronizing group. producer.produce( commit_log_topic, key=f"{expected_message.topic()}:{expected_message.partition()}:{synchronize_commit_group}".encode( "utf8" ), value="{}".format(expected_message.offset() + 1).encode("utf8"), ) assert producer.flush(5) == 0, "producer did not successfully flush queue" # We should have received a single message. # TODO: Can we also assert that the position is unpaused?) for i in range(5): received_message = consumer.poll(1) if received_message is not None: break assert received_message is not None, "no message received" assert received_message.topic() == expected_message.topic() assert received_message.partition() == expected_message.partition() assert received_message.offset() == expected_message.offset() # We should not be able to continue reading into the topic. # TODO: Can we assert that the position is paused? assert consumer.poll(1) is None
import io import struct from avro.io import BinaryDecoder, DatumReader from confluent_kafka import Consumer from confluent_kafka.avro.cached_schema_registry_client import CachedSchemaRegistryClient from confluent_kafka.avro.serializer import SerializerError # Please adjust your server and url # KAFKA BROKER URL consumer = Consumer({ 'bootstrap.servers': '192.168.25.163:19092', 'group.id': 'abcd' }) # SCHEMA URL register_client = CachedSchemaRegistryClient(url="http://192.168.25.163:7070") consumer.subscribe(['job_entity']) MAGIC_BYTES = 0 def unpack(payload): magic, schema_id = struct.unpack('>bi', payload[:5]) # Get Schema registry # Avro value format if magic == MAGIC_BYTES: schema = register_client.get_by_id(schema_id) reader = DatumReader(schema)
from pprint import pformat import os sys.path.insert(0, './provisioners/') import initialize_helper as helper def print_assignment(consumer, partitions): print('Assignment:', partitions) # Get Address of Kafka infra_data = helper.get_cloud_info('infra_values.json') instance_kafka = infra_data['kafka_instance'] conf = {'bootstrap.servers': str(instance_kafka)+":9092", 'group.id': '1', 'session.timeout.ms': 6000,'auto.offset.reset': 'earliest'} c = Consumer(conf) c.subscribe(['data-mag'], on_assign=print_assignment) try: while True: msg = c.poll(timeout=4) if msg is None: continue if msg.error(): print('error happened...') raise KafkaException(msg.error()) else: sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' % (msg.topic(), msg.partition(), msg.offset(), str(msg.key()))) print(msg.value()) except KeyboardInterrupt: sys.stderr.write('%% Aborted by user\n')
def read_data(self): consumer = Consumer(self.config) consumer.subscribe(self.topic) self.run(consumer, 0, [], [])
incrementCount() #Unsecure system connection #connection_str = "{}:5678?auth=basic;user={};password={};ssl=false".format(host,username,password) connection_str = "{}:5678?auth=basic;user={};password={};ssl=true;sslCA=/opt/mapr/conf/ssl_truststore.pem;sslTargetNameOverride={}".format(host,username,password,host) connection = ConnectionFactory.get_connection(connection_str=connection_str) # Get a store and assign it as a DocumentStore object if connection.is_store_exists(store_path=tbl_path): document_store = connection.get_store(store_path=tbl_path) else: document_store = connection.create_store(store_path=tbl_path) # Create the Kakfa Consumer c = Consumer({'group.id': 'mygroup', 'enable.partition.eof': 'false', 'default.topic.config': {'auto.offset.reset': 'earliest'}}) c.subscribe(['/demos/hl7demo/hl7stream:adt_topic']) # Wait for new messages to be produced to the stream running = True while running: msg = c.poll(timeout=1.0) if msg is None: print("No messages on queue...sleeping") continue else: print(msg) if not msg.error(): msg_json = json.loads(msg.value())['msh']
def init_kafka_source(self, **kwargs): from confluent_kafka import Consumer params = {} for parsed_url in kwargs['input']: url_params = parse_qs(parsed_url.query) for key, val in url_params.items(): params.setdefault(key, []).extend(val) bootstrap_servers = params['bootstrap_servers'] list_bootstrap_servers = aslist(bootstrap_servers[0].replace(',', ' ')) if len(list_bootstrap_servers) > 1: bootstrap_servers = list_bootstrap_servers else: bootstrap_servers = params['bootstrap_servers'] offset_reset = params.get('offset_reset') if offset_reset: offset_reset = offset_reset[-1] else: offset_reset = 'largest' strategy = params.get('partition_strategy') if strategy: strategy = strategy[-1] else: strategy = 'roundrobin' return RDKafkaSource( self.logger, self.loop, kwargs['gate'], Consumer({ 'api.version.request': True, 'bootstrap.servers': ','.join(bootstrap_servers), #'debug': 'all', 'default.topic.config': { 'auto.offset.reset': offset_reset, 'enable.auto.commit': True, 'offset.store.method': 'broker', 'produce.offset.report': True, }, 'enable.partition.eof': False, # The lambda is necessary to return control to the main Tornado # thread 'error_cb': lambda err: self.loop.add_callback(self.onKafkaError, err), 'group.id': params['group_name'][0], # See: https://github.com/edenhill/librdkafka/issues/437 'log.connection.close': False, 'max.in.flight': kwargs['inflight'], 'partition.assignment.strategy': strategy, 'queue.buffering.max.ms': 1000, }), *[url.netloc for url in kwargs['input']])
def run(self): def fail_fast(err, msg): if err is not None: print("Kafka producer delivery error: {}".format(err)) print("Bailing out...") # TODO: should it be sys.exit(-1)? raise KafkaException(err) def on_commit(err, partitions): if err is not None: print("Kafka consumer commit error: {}".format(err)) print("Bailing out...") # TODO: should it be sys.exit(-1)? raise KafkaException(err) for p in partitions: # check for partition-specific commit errors print(p) if p.error: print("Kafka consumer commit error: {}".format(p.error)) print("Bailing out...") # TODO: should it be sys.exit(-1)? raise KafkaException(p.error) print("Kafka consumer commit successful") pass def on_rebalance(consumer, partitions): for p in partitions: if p.error: raise KafkaException(p.error) print("Kafka partitions rebalanced: {} / {}".format( consumer, partitions)) consumer_conf = self.kafka_config.copy() consumer_conf.update({ 'group.id': self.consumer_group, 'on_commit': fail_fast, # messages don't have offset marked as stored until pushed to # elastic, but we do auto-commit stored offsets to broker 'enable.auto.commit': True, 'enable.auto.offset.store': False, # user code timeout; if no poll after this long, assume user code # hung and rebalance (default: 5min) 'max.poll.interval.ms': 180000, 'default.topic.config': { 'auto.offset.reset': 'latest', }, }) consumer = Consumer(consumer_conf) producer_conf = self.kafka_config.copy() producer_conf.update({ 'delivery.report.only.error': True, 'default.topic.config': { 'request.required.acks': -1, # all brokers must confirm }, }) producer = Producer(producer_conf) consumer.subscribe( [self.consume_topic], on_assign=on_rebalance, on_revoke=on_rebalance, ) print("Kafka consuming {}".format(self.consume_topic)) while True: msg = consumer.poll(self.poll_interval) if not msg: print("nothing new from kafka (poll_interval: {} sec)".format( self.poll_interval)) continue if msg.error(): raise KafkaException(msg.error()) cle = json.loads(msg.value().decode('utf-8')) #print(cle) print("processing changelog index {}".format(cle['index'])) release_ids = [] new_release_ids = [] file_ids = [] container_ids = [] work_ids = [] release_edits = cle['editgroup']['edits']['releases'] for re in release_edits: release_ids.append(re['ident']) # filter to direct release edits which are not updates if not re.get('prev_revision') and not re.get( 'redirect_ident'): new_release_ids.append(re['ident']) file_edits = cle['editgroup']['edits']['files'] for e in file_edits: file_ids.append(e['ident']) container_edits = cle['editgroup']['edits']['containers'] for e in container_edits: container_ids.append(e['ident']) work_edits = cle['editgroup']['edits']['works'] for e in work_edits: work_ids.append(e['ident']) # TODO: do these fetches in parallel using a thread pool? for ident in set(file_ids): file_entity = self.api.get_file(ident, expand=None) # update release when a file changes # TODO: fetch old revision as well, and only update # releases for which list changed release_ids.extend(file_entity.release_ids or []) file_dict = self.api.api_client.sanitize_for_serialization( file_entity) producer.produce( self.file_topic, json.dumps(file_dict).encode('utf-8'), key=ident.encode('utf-8'), on_delivery=fail_fast, ) for ident in set(container_ids): container = self.api.get_container(ident) container_dict = self.api.api_client.sanitize_for_serialization( container) producer.produce( self.container_topic, json.dumps(container_dict).encode('utf-8'), key=ident.encode('utf-8'), on_delivery=fail_fast, ) for ident in set(release_ids): release = self.api.get_release( ident, expand="files,filesets,webcaptures,container") work_ids.append(release.work_id) release_dict = self.api.api_client.sanitize_for_serialization( release) producer.produce( self.release_topic, json.dumps(release_dict).encode('utf-8'), key=ident.encode('utf-8'), on_delivery=fail_fast, ) # filter to "new" active releases with no matched files if release.ident in new_release_ids: ir = release_ingest_request( release, ingest_request_source='fatcat-changelog') if ir and not release.files and self.want_live_ingest( release, ir): producer.produce( self.ingest_file_request_topic, json.dumps(ir).encode('utf-8'), #key=None, on_delivery=fail_fast, ) producer.flush() # TODO: publish updated 'work' entities to a topic consumer.store_offsets(message=msg)
from confluent_kafka import Consumer, KafkaException # Create logger for consumer (logs will be emitted when poll() is called) logger = logging.getLogger("consumer") logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() handler.setFormatter( logging.Formatter("%(asctime)-15s %(levelname)-8s %(message)s")) logger.addHandler(handler) consumer = Consumer( { "bootstrap.servers": "localhost:9092", "group.id": "atlas-android-data-processor", "session.timeout.ms": 6000, # 'auto.offset.reset': 'earliest' }, logger=logger, ) output_topic = "data_warehouse" def print_assignment(consumer, partitions): print("Assignment:", partitions) consumer.subscribe([output_topic], on_assign=print_assignment) try:
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb(err, partitions): pass kc = Consumer({ 'group.id': 'test', 'socket.timeout.ms': '100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb }) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke(consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') if msg is not None: assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1) partitions = list( map(lambda part: TopicPartition("test", part), range(0, 100, 3))) kc.assign(partitions) # Verify assignment assignment = kc.assignment() assert partitions == assignment # Get cached watermarks, should all be invalid. lo, hi = kc.get_watermark_offsets(partitions[0], cached=True) assert lo == -1001 and hi == -1001 assert lo == OFFSET_INVALID and hi == OFFSET_INVALID # Query broker for watermarks, should raise an exception. try: lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\ str(e.args([0])) kc.unassign() kc.commit(async=True) try: kc.commit(async=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions) try: kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT kc.close()
"Python Consumer" from confluent_kafka import Consumer KAFKA_TOPIC = "driver-positions" print("Starting Python Consumer.") # Configure the group id, location of the bootstrap server, # Confluent interceptors consumer = Consumer({ 'bootstrap.servers': 'kafka:9092', 'plugin.library.paths': 'monitoring-interceptor', 'group.id': 'python-consumer', 'auto.offset.reset': 'earliest' }) # Subscribe to our topic consumer.subscribe([KAFKA_TOPIC]) try: while True: #TODO: Poll for available records msg = consumer.poll(1.0) if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue #TODO: print the contents of the record
# size set in the broker. The lower of the two will set the limit. # If a message exceeds this maximum size, an error should be reported by # the software publishing the run start message (for example NICOS). config = { "bootstrap.servers": kafka_broker, "group.id": "consumer_group_name", "auto.offset.reset": "latest", "enable.auto.commit": False, "message.max.bytes": 100_000_000, "fetch.message.max.bytes": 100_000_000, "enable.partition.eof": True, # used by consumer stop logic } if consumer_type_enum == ConsumerType.REAL: consumers = [ KafkaConsumer(topic_partition, Consumer(config), callback, stop_time_ms) for topic_partition in topic_partitions ] else: consumers = [ KafkaConsumer(TopicPartition(""), FakeConsumer(test_message_queue), callback, stop_time_ms) ] return consumers def start_consumers(consumers: List[KafkaConsumer]): for consumer in consumers: consumer.start()
from confluent_kafka import Consumer, KafkaError from datetime import datetime import json, sqlite3 from email_notif import email_notification from message_notif import message_notification from alarm_notif import alarm_notification print("Notification Manager Service up and running..") c = Consumer({ 'bootstrap.servers': "localhost:9092", 'group.id': '1', 'auto.offset.reset': 'latest' }) c.subscribe(['notify']) while True: msg = c.poll(1.0) if msg is None: continue if msg.error(): continue # print('Received message: {}'.format(msg.value().decode('utf-8'))) _request_ = json.loads(msg.value().decode('utf-8')) connection = sqlite3.connect("db.sqlite3") crsr = connection.cursor() try: task = (_request_['username'], _request_['phone_number'], _request_['email'], _request_['firstname'], _request_['app_name'], _request_['service'],
sys.stderr.write("-T option value needs to be larger than zero: %s\n" % opt[1]) sys.exit(1) conf['stats_cb'] = stats_cb conf['statistics.interval.ms'] = int(opt[1]) # Create logger for consumer (logs will be emitted when poll() is called) logger = logging.getLogger('consumer') logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() handler.setFormatter(logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s')) logger.addHandler(handler) # Create Consumer instance # Hint: try debug='fetch' to generate some log messages c = Consumer(conf, logger=logger) def print_assignment(consumer, partitions): print('Assignment:', partitions) # Subscribe to topics c.subscribe(topics, on_assign=print_assignment) # Read messages from Kafka, print to stdout try: while True: msg = c.poll(timeout=100.0) if msg is None: continue if msg.error(): # Error or event
from confluent_kafka import Consumer, KafkaError, TopicPartition c = Consumer({ 'bootstrap.servers': '192.168.33.6:9092', 'group.id': 'mygroup', 'auto.offset.reset': 'earliest' }) # tp = TopicPartition("mytopic", 1, 0) # c.assign([tp]) # c.seek(tp) c.subscribe(['mytopic']) while True: msg = c.poll(1.0) if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue print('Received message: {} - {} - {}'.format(msg.value().decode('utf-8'), msg.topic(), msg.partition())) c.close() if __name__ == '__main__': pass
if __name__ == '__main__': # 步驟1.設定要連線到Kafka集群的相關設定 # Consumer configuration # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md props = { 'bootstrap.servers': 'localhost:9092', # Kafka集群在那裡? (置換成要連接的Kafka集群) 'group.id': 'STUDENT_ID', # ConsumerGroup的名稱 (置換成你/妳的學員ID) 'auto.offset.reset': 'latest', # Offset從最前面開始 'enable.auto.commit':False, 'session.timeout.ms': 6000, # consumer超過6000ms沒有與kafka連線,會被認為掛掉了 'error_cb': error_cb # 設定接收error訊息的callback函數 } # 步驟2. 產生一個Kafka的Consumer的實例 consumer = Consumer(props) # 步驟3. 指定想要訂閱訊息的topic名稱 topicName = "project" # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic consumer.subscribe([topicName], on_assign=my_assign) # 步驟5. 持續的拉取Kafka有進來的訊息 count = 0 host="localhost" port=3306 user='' passwd='' db='toyota' charset='utf8' conn=pymysql.connect(host=host,port=port,user=user,passwd=passwd,db=db,charset=charset) cursor=conn.cursor()
from confluent_kafka import Producer, Consumer, KafkaError import numpy as np os.environ['LD_LIBRARY_PATH'] = "$LD_LIBRARY_PATH:/opt/mapr/lib" cascPath = "haarcascade_frontalface_default.xml" # Create the haar cascade faceCascade = cv2.CascadeClassifier(cascPath) """ Removing the original read from file - replacing with a Streams consumer # Read the image image = cv2.imread(imagePath) """ c = Consumer({'group.id': 'mygroup', 'default.topic.config': {'auto.offset.reset': 'earliest'}}) c.subscribe(['/demo-streams/dbchanges:topic1']) running = True while running: msg = c.poll(timeout=1.0) if msg is None: continue if not msg.error(): # Replace the simple receiver with the streams consumer # Get the message and pull off the image field # Load as a json document, retrieve image element and decode from base64 nparr = np.fromstring(base64.b64decode(json.loads(msg.value())['$$document']['image']), np.uint8) image = cv2.imdecode(nparr, 1) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Detect faces in the image faces = faceCascade.detectMultiScale(
from confluent_kafka import Consumer c = Consumer({ 'bootstrap.servers': 'localhost:9092,localhost:9192,localhost:9292', 'group.id': 'mygroup', 'auto.offset.reset': 'earliest' }) c.subscribe(['streams-sum-output']) print("Start consuming...") while True: msg = c.poll(1.0) if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue value = msg.value().decode('utf-8') '''if value is None: value = -1 else: value = msg.value()[-1]''' kvalue = msg.key().decode('utf-8') print('Received message: {0} , {1}'.format(kvalue, value)) c.close()
def __init__(self, conf, topic_name): self.consumer = Consumer(conf) self.topic_name = topic_name self.running = True
def create_consumer(self, group_id=None, server="127.0.0.1", port="9092", enable_auto_commit=True, auto_offset_reset="latest", schema_registry_url=None, auto_create_topics=True, key_deserializer=None, value_deserializer=None, legacy=True, **kwargs): """Create Kafka Consumer and returns its `group_id` as string. Keyword Arguments: - ``server``: (str): IP address / domain, that the consumer should contact to bootstrap initial cluster metadata. Default: `127.0.0.1`. - ``port`` (int): Port number. Default: `9092`. - ``group_id`` (str or uuid.uuid4() if not set) : name of the consumer group to join for dynamic partition assignment (if enabled), and to use for fetching and committing offsets. If None, unique string is generated (via uuid.uuid4()) and offset commits are disabled. Default: `None`. - ``auto_offset_reset`` (str): A policy for resetting offsets on OffsetOutOfRange errors: `earliest` will move to the oldest available message, `latest` will move to the most recent. Any other value will raise the exception. Default: `latest`. - ``enable_auto_commit`` (bool): If true the consumer's offset will be periodically committed in the background. Default: `True`. - ``schema_registry_url`` (str): *required* for Avro Consumer. Full URL to avro schema endpoint. - ``auto_create_topics`` (bool): Consumers no longer trigger auto creation of topics, will be removed in future release. Default: `True`. - ``legacy`` (bool): Activate SerializingConsumer if 'False' else AvroConsumer (legacy) is used. Will be removed when confluent-kafka will deprecate this. Default: `True`. Note: Configuration parameters are described in more detail at https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md \n """ if group_id is None: group_id = str(uuid.uuid4()) if schema_registry_url and legacy == True: consumer = AvroConsumer({ 'bootstrap.servers': '{}:{}'.format(server, port), 'group.id': group_id, 'enable.auto.commit': enable_auto_commit, 'allow.auto.create.topics': auto_create_topics, 'auto.offset.reset': auto_offset_reset, 'schema.registry.url': schema_registry_url, **kwargs }) elif legacy == False: consumer = DeserializingConsumer({ 'bootstrap.servers': '{}:{}'.format(server, port), 'group.id': group_id, 'enable.auto.commit': enable_auto_commit, 'auto.offset.reset': auto_offset_reset, 'key.deserializer': key_deserializer, 'value.deserializer': value_deserializer, **kwargs }) else: consumer = Consumer({ 'bootstrap.servers': '{}:{}'.format(server, port), 'group.id': group_id, 'enable.auto.commit': enable_auto_commit, 'allow.auto.create.topics': auto_create_topics, 'auto.offset.reset': auto_offset_reset, **kwargs }) self.consumers[group_id] = consumer return group_id
from confluent_kafka import Consumer, KafkaError c = Consumer({ 'bootstrap.servers': 'b-2.xxxx.xxxx.xxxx.kafka.us-east-1.amazonaws.com:9092,b-1.xxxx.xxxx.xxxx.kafka.us-east-1.amazonaws.com:9092,b-3.xxxx.xxxx.xxxx.kafka.us-east-1.amazonaws.com:9092', 'group.id': 'mygroup', 'auto.offset.reset': 'latest' }) c.subscribe(['sampleTopic']) while True: msg = c.poll(0.1) if msg is None: print("No Data") continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue print('Received message: {}'.format(msg.value().decode('utf-8'))) c.close()
def test_any_method_after_close_throws_exception(): """ Calling any consumer method after close should thorw a RuntimeError """ c = Consumer({ 'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100 }) c.subscribe(["test"]) c.unsubscribe() c.close() with pytest.raises(RuntimeError) as ex: c.subscribe(['test']) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.unsubscribe() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.poll() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.consume() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.assign([TopicPartition('test', 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.unassign() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.assignment() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.commit() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.committed([TopicPartition("test", 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.position([TopicPartition("test", 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.seek([TopicPartition("test", 0, 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: lo, hi = c.get_watermark_offsets(TopicPartition("test", 0)) assert 'Consumer closed' == str(ex.value)
from confluent_kafka import Consumer # get saved keys import generalconfig as cfg confluentKey = cfg.pwd['confluentKey'] confluentSecret = cfg.pwd['confluentSecret'] c = Consumer({ 'bootstrap.servers': "pkc-41973.westus2.azure.confluent.cloud:9092", 'security.protocol': 'SASL_SSL', 'sasl.mechanism': 'PLAIN', 'sasl.username': confluentKey, 'sasl.password': confluentSecret, 'group.id': 'newgroup2', }) c.subscribe(['StreamResults']) while True: msg = c.poll(200) val = msg.value().decode('utf-8') key = msg.key() print(f'Key: {key} Value: {val}') c.close()