def consumer_factory_(topic: str) -> Consumer: consumer = Consumer({ "group.id": "asdf", "enable.auto.commit": False, "enable.partition.eof": False, **unittest_config.create_confluent_config(), }) partitions = consumer.list_topics(topic=topic).topics[topic].partitions consumer.assign([ TopicPartition(topic=topic, partition=p, offset=0) for p in partitions ]) consumers.append(consumer) return consumer
def kafka_consume_expected(topic, group='0', timeout=1.0, mfilter=lambda x: True, validator=lambda x: None, after_subscribe=lambda: None): consumer = Consumer({ 'bootstrap.servers': KAFK, 'group.id': group, 'auto.offset.reset': 'earliest' # earliest _committed_ offset }) msgs = [] topics = consumer.list_topics(topic) # promises to create topic logging.debug("Topic state: %s", topics.topics) if topics.topics[topic].error is not None: logging.warning("Error subscribing to topic: %s", topics.topics) return msgs consumer.subscribe([topic]) time.sleep(5) # for kafka to rebalance consumer groups after_subscribe() logging.debug("Waiting for messages...") while True: msg = consumer.poll(timeout) if msg is None: break logging.info("Seen message: %r %r", msg.key(), msg.value()) if msg.error(): logging.warning("Consumer error: {}".format(msg.error())) continue if mfilter(msg): validator(msg) msgs.append(msg) consumer.commit() consumer.close() return msgs
class Kafka(object): def __init__(self, target_key) -> None: super().__init__() self.address = _address_for_key(target_key) kafka_config = { 'bootstrap.servers': self.address, 'group.id': "up9-test-group", 'enable.auto.commit': 'false' # important for passive observing } if "ssl://" in self.address.lower(): kafka_config['security.protocol'] = 'SSL' self.consumer = Consumer(kafka_config) self.producer = Producer(kafka_config) self.watching_topics = [] self.consumer.list_topics(timeout=5) # to check for connectivity def watch_topics(self, topics: list): def my_on_assign(consumer, partitions): logging.debug("On assign: %r", partitions) consumer.assign(partitions) for partition in partitions: low, high = consumer.get_watermark_offsets(partition) partition.offset = high logging.debug("Setting offset: %r", partition) consumer.seek(partition) self.watching_topics.extend(topics) self.consumer.subscribe(topics, on_assign=my_on_assign) self.consumer.poll(0.01) # to trigger partition assignments def get_watched_messages(self, interval=0.0, predicate=lambda x: True): logging.debug( "Checking messages that appeared on kafka topics: %r", self.watching_topics) res = [] start = time.time() while True: msg = self.consumer.poll(interval) if msg is None or time.time() - start > interval: break # done reading if msg.error(): raise KafkaException("kafka consumer error: {}".format( msg.error())) logging.debug( "Potential message: %r", (msg.partition(), msg.key(), msg.headers(), msg.value())) if predicate(msg): res.append(msg) # TODO: consumer.close() return res def assert_seen_message(self, resp, delay=0, predicate=lambda x: True): @recorder.assertion_decorator def assert_seen_kafka_message(resp, topics, delay): messages = self.get_watched_messages(delay, predicate) messages = [(m.topic(), m.key(), m.value(), m.headers()) for m in messages] if not messages: raise AssertionError("No messages on Kafka topic %r" % topics) else: logging.info("Validated the messages have appeared: %s", messages) return messages return assert_seen_kafka_message(resp, self.watching_topics, delay) def put(self, topic, data=None, json=None, headers=None): # TODO: parse key out of URL if topic.startswith('/'): topic = topic[1:] if data is None and json is not None: data = json_lib.dumps(json) with apiritif.transaction('kafka://[' + self.address + ']/' + topic): logging.info("Sending message to Kafka topic %r: %r", topic, data) self.producer.produce( topic, data, headers=[] if headers is None else headers) self.producer.poll(0) self.producer.flush() wrapped_req = self._make_request( 'PUT', 'kafka://' + self.address.split(',')[0] + '/' + topic, data) wrapped_response = self._make_response(wrapped_req) recorder.record_http_request('PUT', self.address, wrapped_req, wrapped_response, _context.session) return wrapped_response def _make_request(self, method, url, request): req = requests.Request(method, url=url, data=request) prepared = req.prepare() _context.grpc_mapping[id(request)] = prepared return prepared def _make_response(self, wrapped_req): resp = requests.Response() resp.status_code = 202 resp.request = wrapped_req resp._request = wrapped_req resp.msg = 'Accepted' resp.raw = io.BytesIO() return resp
class TimeOrderedGeneratorWithTimeout(GeneratorInterface): """ A general generator which can read multiple topics and merge their messages in time order. A message must be emitted at (arrival_system_time + latency_ms). In batch mode (until reaching the first EOP on each stream) the generator will not discard any messages. """ def __init__( self , broker , groupid , topics_infos: List[TopicInfo] , latency_ms , commit_interval_sec=None , group_by_time=False , begin_timestamp=None , begin_flag=None , end_timestamp=None , end_flag=None , heartbeat_interval_ms=-1 , begin_offset=None ): """ :param broker: Broker to connect to. :param groupid: Group id of the consumer. :param topics_infos: [TopicInfo()] - list of TopicInfo objects. :param latency_ms: (integer >=0) Latency to wait before serving a message. After this messages with lower or equal timestamps will be discarded. :param commit_interval_sec: How many seconds to wait between commits.-1 does not commit with the given group id. :param group_by_time: Group messages with the same timestamp. This will yield a list of messages. :param begin_timestamp: Timestamp of the kafka messages where the generator will start. :param begin_flag: BEGINNING, CONTINUE, LIVE - CONTINUE will continue from the last committed offset. If there was no committed offset will start from the end of the stream. :param end_timestamp: Timestamp where to end the reading. :param end_flag: NEVER, END_OF_PARTITION :param heartbeat_interval_ms: -1 does not produce heartbeat. After every interval will produce a HeartBeat typed message with the timestamp. :param begin_offset: Starting offset position if begin_flag is set to OFFSET """ if begin_timestamp is not None and begin_flag is not None: raise Exception('You can not set the begin timestamp and a flag in the same time.') if end_timestamp is not None and end_flag is not None: raise Exception('You can not set the end timestamp and a flag in the same time.') if begin_timestamp is not None and end_timestamp is not None and begin_timestamp >= end_timestamp: raise Exception('The begin timestamp is larger then the end timestamp.') if begin_flag is not None and end_flag is not None and \ begin_flag == BeginFlag.LIVE and end_flag == EndFlag.END_OF_PARTITION: raise Exception('You can not start in live and process until the end of the streams.') if end_flag is not None and not (end_flag == EndFlag.END_OF_PARTITION or end_flag == EndFlag.NEVER): raise Exception('Unknown end flag: {} . Please use the given enum to use proper end flag.'.format(end_flag)) if begin_flag == BeginFlag.OFFSET and begin_offset is None: raise Exception('Starting offset position must be configured if BeginFlag is set to OFFSET') if begin_offset is not None: if begin_flag != BeginFlag.OFFSET: raise Exception('Specific offset starting position is set but BeginFlag is not set to OFFSET.') elif not isinstance(begin_offset, int): raise Exception('Starting offset must be integer, not {}.'.format(type(begin_offset))) self.end_ts = end_timestamp self.end_flag = end_flag self.begin_offset = begin_offset self.commit_interval_sec = commit_interval_sec self.latency_ms = latency_ms self.group_by_time = group_by_time self.max_poll_interval_ms = 5 * 60 * 1000 self.consumer = Consumer( {'bootstrap.servers': broker, 'group.id': groupid, 'enable.auto.commit': False, 'auto.offset.reset': 'earliest' if begin_flag == BeginFlag.CONTINUE_OR_BEGINNING else 'latest', 'fetch.wait.max.ms': 20, 'max.poll.interval.ms': self.max_poll_interval_ms, 'enable.partition.eof': True}) self.last_poll = None self.running = True # Warning: # If you check individual topics, kafka may auto create them if the auto.create.topics.enable is set to True. try: self.consumer.list_topics(timeout=1) except KafkaException as e: if e.args[0].name() == "_TRANSPORT": logging.error( 'Broker "{0}" is not available. Please check if it is running and accessible. \n{1}'.format(broker, e) ) self.running = False else: raise e self.tps = [] self.queues = {} self.messages_to_be_committed = {} self.begin_timestamp = begin_timestamp for ti in topics_infos: topic_name = ti.topic self.messages_to_be_committed[topic_name] = {'last_msg': None, 'committed': True} if begin_timestamp is not None: self.tps.extend(self.consumer.offsets_for_times( [TopicPartition(topic_name, partition=ti.partition, offset=begin_timestamp)])) elif begin_flag is not None: if begin_flag == BeginFlag.BEGINNING: self.tps.append(TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_BEGINNING)) elif begin_flag in (BeginFlag.CONTINUE, BeginFlag.CONTINUE_OR_BEGINNING): self.tps.append(TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_STORED)) elif begin_flag == BeginFlag.LIVE: self.tps.append(TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_END)) elif begin_flag == BeginFlag.OFFSET: self.tps.append(TopicPartition( topic_name, partition=ti.partition, offset=OFFSET_BEGINNING if begin_offset <= 0 else begin_offset) ) else: raise Exception('Unknown begin flag. Please use the enum to provide proper begin flag.') else: self.tps.append(TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_END)) end_offset = None if end_flag is not None and end_flag == EndFlag.END_OF_PARTITION: end_offset = self.consumer.get_watermark_offsets(TopicPartition(topic_name, 0))[1] - 1 if end_offset is None or end_offset >= 0: self.queues[topic_name] = Topic( topic_name , self.consumer , end_offset=end_offset , partition=ti.partition , drop=ti.drop , is_live=(begin_timestamp is None and begin_flag is None) or begin_flag == BeginFlag.LIVE ) self.consumer.assign(self.tps) self.last_commit = time.time() self.heartbeat_interval_ms = heartbeat_interval_ms self.next_hb = None def stopGenerator(self): self.running = False def _serve_messages(self, message_to_serve): if self.commit_interval_sec is not None and self.group_by_time: for msg in message_to_serve: self.messages_to_be_committed[msg.topic()]['last_msg'] = msg self.messages_to_be_committed[msg.topic()]['committed'] = False # serve messages if self.group_by_time: yield message_to_serve else: for msg in message_to_serve: self.messages_to_be_committed[msg.topic()]['last_msg'] = msg self.messages_to_be_committed[msg.topic()]['committed'] = False yield msg if not self.running: break # commit messages when they were delivered current_time = time.time() if self.commit_interval_sec is not None and ( current_time - self.last_commit) > self.commit_interval_sec: for k in self.messages_to_be_committed.keys(): if not self.messages_to_be_committed[k]['committed']: self.consumer.commit(self.messages_to_be_committed[k]['last_msg']) self.messages_to_be_committed[k]['committed'] = True self.last_commit = current_time def _serve_heartbeat(self, current_timestamp_ms): if self.next_hb is None: if self.begin_timestamp is not None: self.next_hb = self.begin_timestamp else: self.next_hb = current_timestamp_ms while self.next_hb <= current_timestamp_ms: yield HeartBeat(self.next_hb) self.next_hb += self.heartbeat_interval_ms def _can_serve(self): min_ets = min([q.queue[0].message.timestamp()[1] for q in self.queues.values() if len(q.queue) > 0], default=-1) if min_ets == -1: return None deadline = getSystemTimestamp() - self.latency_ms if all([q.can_be_emitted(min_ets) for q in self.queues.values()]) and \ any([q.queue[0].ts < deadline for q in self.queues.values() if len(q.queue) > 0 and q.queue[0].message.timestamp()[1] == min_ets]): return min_ets else: return None def getMessages(self): while self.running: if all([v.stopped for v in self.queues.values()]): message_to_serve = [] for q in self.queues.values(): message_to_serve.extend(q.queue) message_to_serve = [m.message for m in message_to_serve] message_to_serve.sort(key=lambda x: x.timestamp()[1]) while len(message_to_serve) > 0: ts = message_to_serve[0].timestamp()[1] serve_it = [] while len(message_to_serve) > 0 and message_to_serve[0].timestamp()[1] == ts: serve_it.append(message_to_serve.pop(0)) if not self.heartbeat_interval_ms == -1: yield from self._serve_heartbeat(ts) yield from self._serve_messages(serve_it) logging.info('Exiting from generator.') break self.last_poll = getSystemTimestamp() msg = self.consumer.poll(0.001) if msg is not None: if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: if msg.topic() in self.queues: self.queues[msg.topic()].first_eop_reached = True self.queues[msg.topic()].end_of_partition = True else: logging.error('Unhandle error: {}'.format(msg.error())) break else: self.queues[msg.topic()].end_of_partition = False if self.end_ts is not None and msg.timestamp()[1] > self.end_ts: self.queues[msg.topic()].stop_topic() else: self.queues[msg.topic()].add_message(msg) while self.running: event_ts_to_serve = self._can_serve() if event_ts_to_serve is None or \ self.max_poll_interval_ms - (getSystemTimestamp() - self.last_poll) < 30000: if self.end_flag == EndFlag.NEVER and self.heartbeat_interval_ms != -1 \ and any([q.end_of_partition for q in self.queues.values()]): if self.next_hb is None: self.next_hb = min(getSystemTimestamp() - self.latency_ms, min([q.queue[0].message.timestamp()[1] for q in self.queues.values() if len(q.queue) > 0], default=sys.maxsize)) if self.next_hb < min(getSystemTimestamp() - self.latency_ms, min([q.queue[0].message.timestamp()[1] for q in self.queues.values() if len(q.queue) > 0], default=sys.maxsize)): yield from self._serve_heartbeat(self.next_hb) break if self.heartbeat_interval_ms != -1: yield from self._serve_heartbeat(event_ts_to_serve) message_to_serve = [] for q in self.queues.values(): message_to_serve.extend(q.get_messages(event_ts_to_serve)) yield from self._serve_messages(message_to_serve) if self.end_ts is not None and self.end_ts <= event_ts_to_serve: self.running = False self.consumer.close()
] if __name__ == "__main__": parser = argparse.ArgumentParser( description= "create topics if they dont exist using the output of CS:INSTLIST") parser.add_argument("filename") parser.add_argument("--broker", help="the broker to create the topics on") args = parser.parse_args() broker = args.broker conf = {"bootstrap.servers": broker} admin_client = AdminClient(conf) conf["group.id"] = str(uuid.uuid4()) cons = Consumer(conf) topics = cons.list_topics() topics_list = topics.topics with open(args.filename) as file: json = json.load(file) for item in json: inst_name = item["name"] for topic_suffix in TOPICS_PER_INST: topic_to_check = inst_name + topic_suffix if topic_to_check not in topics_list: print(f"creating {topic_to_check}") new_topic = NewTopic(topic_to_check, num_partitions=1) admin_client.create_topics([new_topic]) admin_client.poll(10)