def test_logging_constructor(): """ Verify different forms of constructors """ for how in ['dict', 'dict+kwarg', 'kwarg']: logger = logging.getLogger('producer: ' + how) logger.setLevel(logging.DEBUG) f = CountingFilter(logger.name) logger.addFilter(f) if how == 'dict': p = confluent_kafka.Producer({'debug': 'all', 'logger': logger}) elif how == 'dict+kwarg': p = confluent_kafka.Producer({'debug': 'all'}, logger=logger) elif how == 'kwarg': conf = {'debug': 'all', 'logger': logger} p = confluent_kafka.Producer(**conf) else: raise RuntimeError('Not reached') print('Test %s with %s' % (p, how)) while f.cnt == 0: p.poll(timeout=0.5) print('%s: %s: %d log messages seen' % (how, f.name, f.cnt))
def test_delete_topic_from_ongoin_tx(self): tx_id = "0" producer = ck.Producer({ 'bootstrap.servers': self.redpanda.brokers(), 'transactional.id': tx_id, }) producer.init_transactions() producer.begin_transaction() for topic in self.topics: for partition in range(topic.partition_count): producer.produce(topic.name, '0', '0', partition) producer.flush() txs_info = self.admin.get_all_transactions() assert len( txs_info) == 1, "Should be only one transaction in current time" rpk = RpkTool(self.redpanda) topic_name = self.topics[0].name rpk.delete_topic(topic_name) tx = txs_info[0] assert tx[ "transactional_id"] == tx_id, f"Expected transactional_id: {tx_id}, but got {tx['transactional_id']}" for partition in tx["partitions"]: assert (partition["ns"] == "kafka") if partition["topic"] == topic_name: self.admin.delete_partition_from_transaction( tx["transactional_id"], partition["ns"], partition["topic"], partition["partition_id"], partition["etag"]) producer.commit_transaction() producer = ck.Producer({ 'bootstrap.servers': self.redpanda.brokers(), 'transactional.id': tx_id, }) producer.init_transactions() producer.begin_transaction() for topic in self.topics: if topic.name is not topic_name: for partition in range(topic.partition_count): producer.produce(topic.name, '0', '0', partition) producer.commit_transaction()
def test_mark_transaction_expired(self): producer1 = ck.Producer({ 'bootstrap.servers': self.redpanda.brokers(), 'transactional.id': '0', }) producer2 = ck.Producer({ 'bootstrap.servers': self.redpanda.brokers(), 'transactional.id': '1', }) producer1.init_transactions() producer2.init_transactions() producer1.begin_transaction() producer2.begin_transaction() for topic in self.topics: for partition in range(topic.partition_count): producer1.produce(topic.name, '0', '0', partition) producer2.produce(topic.name, '0', '1', partition) producer1.flush() producer2.flush() expected_pids = None txs_info = self.admin.get_transactions(topic.name, partition, "kafka") expected_pids = set( map(self.extract_pid, txs_info['active_transactions'])) assert (len(expected_pids) == 2) abort_tx = list(expected_pids)[0] expected_pids.discard(abort_tx) for topic in self.topics: for partition in range(topic.partition_count): self.admin.mark_transaction_expired(topic.name, partition, { "id": abort_tx[0], "epoch": abort_tx[1] }, "kafka") txs_info = self.admin.get_transactions(topic.name, partition, "kafka") assert ('expired_transactions' not in txs_info) assert (len(expected_pids) == len( txs_info['active_transactions'])) for tx in txs_info['active_transactions']: assert (self.extract_pid(tx) in expected_pids) assert (tx['status'] == 'ongoing') assert (tx['timeout_ms'] == 60000)
def kafka_producer(options): # look for the servers (it is the only config we are interested in) servers = [ elm["value"] for elm in options["processing"]["kafka_config"] if elm["name"] == "bootstrap.servers" ] if len(servers) < 1: raise ValueError( "Bad kafka_config, could not find 'bootstrap.servers'.\n" "The configuration should have an entry of the format \n" "{name:'bootstrap.servers', value:'127.0.0.1'} at path 'processing.kafka_config'" ) servers = servers[0] settings = { "bootstrap.servers": servers, "group.id": "test-consumer-%s" % uuid.uuid4().hex, "enable.auto.commit": True, "auto.offset.reset": "earliest", } producer = kafka.Producer(settings) return producer
def createProducer(self, kafkaSync): self.config["kafkaSync"] = kafkaSync config = {'bootstrap.servers': self.config["kafkaHost"], 'client.id': socket.gethostname(), 'default.topic.config': {'acks': 'all'}} self.producer = confluent_kafka.Producer(config)
def confluent_kafka_producer_performance(data, nums=5000): topic = 'yrw_event_log' conf = {'bootstrap.servers': '192.168.0.164:9092'} producer = confluent_kafka.Producer(**conf) messages_to_retry = 0 producer_start = time.time() j = 0 for content in data.run(nums): try: producer.produce(topic, value=content.encode('utf-8')) j = j + 1 if j % 100000 == 0: producer.flush() except BufferError as e: messages_to_retry += 1 # hacky retry messages that over filled the local buffer for i in range(messages_to_retry): producer.poll(0) try: producer.produce(topic, value=content.encode('utf-8')) except BufferError as e: producer.poll(0) producer.produce(topic, value=content.encode('utf-8')) producer.flush() print('confluent_kafka produce {0} rows cost {1} s'.format( nums, time.time() - producer_start))
def setUp(self): data_path = os.path.abspath( os.path.join(os.path.dirname(__file__), 'data')) schema_path = os.path.abspath( os.path.join(os.path.dirname(__file__), '../schemas/distribution_schema_0p2.avsc')) r = AlertReader(data_path) alerts = r.to_list() conf = load_credentials(tmp=True) kafka_servers = conf['servers'] p = confluent_kafka.Producer({'bootstrap.servers': kafka_servers}) for alert in alerts: avro_data = encode_into_avro(alert, schema_path) topic = get_legal_topic_name(alert['cdsxmatch']) p.produce(topic, avro_data) p.flush() # instantiate an AlertConsumer mytopics = conf["mytopics"] myconfig = { 'bootstrap.servers': kafka_servers, 'group_id': conf['group_id'] } self.consumer = AlertConsumer(mytopics, myconfig, schema_path=schema_path)
def fly_drones(bootstrap_servers, nmessages, producer_dict_kwargs=None, topic_name="drones_raw", time_delay=0, drones=None): """ A simple example of sending drones that send JSON messages to the message broker. Args: bootstrap_servers (str): Comma separated string of Kafka servers nmessages (int): Number of messages to send producer_dict_kwargs (dict): Optional keyword arguments for producer topic_name (str): Topic name to which drone messages will be sent time_delay (int): Delay time between cycles when producing messages drones (iterable): Iterable of drones from which to generate messages """ pdk = {'bootstrap.servers': bootstrap_servers} if isinstance(producer_dict_kwargs, dict): pdk.update(producer_dict_kwargs) producer = ck.Producer(pdk) z = len(str(nmessages)) # Pretty print cycle number for logging for i in range(nmessages): print("====MESSAGE SET {}====".format(str(i).zfill(z))) for drone in drones: msg = drone.message() print(msg) producer.produce(topic_name, msg) time.sleep(time_delay) producer.flush()
def verify_producer(): """ Verify basic Producer functionality """ # Producer config conf = { 'bootstrap.servers': bootstrap_servers, 'error_cb': error_cb, 'api.version.request': api_version_request } # Create producer p = confluent_kafka.Producer(**conf) print('producer at %s' % p) headers = produce_headers # Produce some messages p.produce(topic, 'Hello Python!', headers=headers) p.produce(topic, key='Just a key and headers', headers=headers) p.produce(topic, key='Just a key') p.produce(topic, partition=1, value='Strictly for partition 1', key='mykey', headers=headers) # Produce more messages, now with delivery report callbacks in various forms. mydr = MyTestDr() p.produce(topic, value='This one has a dr callback', callback=mydr.delivery) p.produce(topic, value='This one has a lambda', callback=lambda err, msg: MyTestDr._delivery(err, msg)) p.produce(topic, value='This one has neither') # Try producing with a timestamp try: p.produce(topic, value='with a timestamp', timestamp=123456789000) except NotImplementedError: if confluent_kafka.libversion()[1] >= 0x00090400: raise # Produce even more messages for i in range(0, 10): p.produce(topic, value='Message #%d' % i, key=str(i), callback=mydr.delivery) p.poll(0) print('Waiting for %d messages to be delivered' % len(p)) # Block until all messages are delivered/failed p.flush() # # Additional isolated tests # test_producer_dr_only_error()
def setUp(self): data_path = os.path.abspath(os.path.join( os.path.dirname(__file__), 'data')) schema_path = os.path.abspath(os.path.join( os.path.dirname(__file__), '../schemas/distribution_schema_0p2.avsc')) r = AlertReader(data_path) alerts = r.to_list() kafka_servers = 'localhost:9093, localhost:9094, localhost:9095' p = confluent_kafka.Producer({ 'bootstrap.servers': kafka_servers}) for alert in alerts: avro_data = encode_into_avro(alert, schema_path) topic = get_legal_topic_name(alert['cdsxmatch']) p.produce(topic, avro_data) p.flush() # instantiate an AlertConsumer mytopics = ["rrlyr"] myconfig = { 'bootstrap.servers': kafka_servers, 'group_id': 'test_group'} self.consumer = AlertConsumer(mytopics, myconfig, schema=schema_path)
def send(topicname, records, schema): """ Send an avro "packet" to a particular topic at IPAC Parameters ---------- topic: name of the topic, e.g. ztf_20191221_programid2_zuds records: a list of dictionaries schema: schema definition """ # Parse the schema file #schema_definition = fastavro.schema.load_schema(schemafile) # Write into an in-memory "file" import fastavro import confluent_kafka out = BytesIO() fastavro.writer(out, schema, records) out.seek(0) # go back to the beginning # Connect to the IPAC Kafka brokers producer = confluent_kafka.Producer({ 'bootstrap.servers': 'ztfalerts04.ipac.caltech.edu:9092,ztfalerts05.ipac.caltech.edu:9092,ztfalerts06.ipac.caltech.edu:9092' }) # Send an avro alert producer.produce(topic=topicname, value=out.read()) producer.flush()
async def on_start(self) -> None: self._producer = confluent_kafka.Producer({ 'bootstrap.servers': server_list( self.transport.url, self.transport.default_port), 'client.id': self.app.conf.broker_client_id, 'max.in.flight.requests.per.connection': 1, })
def confluent_kafka_producer_performance(topic=topic, msg_count=1): conf = {'bootstrap.servers': bootstrap_servers} producer = confluent_kafka.Producer(**conf) print("\n>>> Connect Kafka in {} by confluent-kafka-python as producer". format(bootstrap_servers)) messages_to_retry = 0 producer_start = time.time() for i in range(msg_count): try: producer.produce(topic, value=msg_payload) except BufferError as e: messages_to_retry += 1 # hacky retry messages that over filled the local buffer for i in range(messages_to_retry): producer.poll(0) try: producer.produce(topic, value=msg_payload) except BufferError as e: producer.poll(0) producer.produce(topic, value=msg_payload) producer.flush() return time.time() - producer_start
def test_topic_config_update(): # *NOTE* default.topic.config has been deprecated. # This example remains to ensure backward-compatibility until its removal. confs = [{"message.timeout.ms": 600000, "default.topic.config": {"message.timeout.ms": 1000}}, {"message.timeout.ms": 1000}, {"default.topic.config": {"message.timeout.ms": 1000}}] def on_delivery(err, msg): # Since there is no broker, produced messages should time out. global seen_delivery_cb seen_delivery_cb = True assert err.code() == confluent_kafka.KafkaError._MSG_TIMED_OUT for conf in confs: p = confluent_kafka.Producer(conf) start = time.time() timeout = start + 10.0 p.produce('mytopic', value='somedata', key='a key', on_delivery=on_delivery) while time.time() < timeout: if seen_delivery_cb: return p.poll(1.0) if "CI" in os.environ: pytest.xfail("Timeout exceeded") pytest.fail("Timeout exceeded")
def throttle_cb_instantiate(): """ Ensure we can configure a proper callback""" def throttle_cb(throttle_event): pass confluent_kafka.Producer({'throttle_cb': throttle_cb})
def confluent_kafka_producer_performance(): topic = 'test' conf = {'bootstrap.servers': '172.20.10.10:9092'} producer = confluent_kafka.Producer(**conf) print(producer) messages_to_retry = 0 msg_payload = 'This is message' producer_start = time.time() for i in range(10): try: producer.produce(topic, value=msg_payload) print(msg_payload) except BufferError as e: messages_to_retry += 1 # hacky retry messages that over filled the local buffer for i in range(messages_to_retry): producer.poll(0) try: producer.produce(topic, value=msg_payload) except BufferError as e: producer.poll(0) producer.produce(topic, value=msg_payload) # producer.flush() return (time.time() - producer_start)
def init(): global log global kafka_producer if not log: log = create_logger(Config().get("logging")) if kafka_producer: raise Exception("XOSKafkaProducer already initialized") else: log.info("Connecting to Kafka with bootstrap servers: %s" % Config.get("kafka_bootstrap_servers")) try: producer_config = { "bootstrap.servers": ",".join(Config.get("kafka_bootstrap_servers")) } kafka_producer = confluent_kafka.Producer(**producer_config) log.info("Connected to Kafka: %s" % kafka_producer) except confluent_kafka.KafkaError as e: log.exception("Kafka Error: %s" % e)
def setUp(self): data_path = os.path.abspath( os.path.join(os.path.dirname(__file__), 'data')) alert_reader = read_avro_alerts(data_path) kafka_servers = 'localhost:9093, localhost:9094, localhost:9095' p = confluent_kafka.Producer({'bootstrap.servers': kafka_servers}) for alert in alert_reader: avro_data = encode_into_avro(alert) topic = get_legal_topic_name(alert['cross_match_alerts_per_batch']) p.produce(topic, avro_data) p.flush() # instantiate an AlertConsumer mytopics = ["rrlyr", "ebwuma", "unknown"] test_schema = os.path.abspath( os.path.join(os.path.dirname(__file__), "test_schema.avsc")) myconfig = { 'bootstrap.servers': kafka_servers, 'group_id': 'test_group' } self.consumer = AlertConsumer(mytopics, myconfig, schema=test_schema)
def __init__(self, logger, config_filepath, client_id): self.logger = logger self.config_filepath = config_filepath self.client_id = client_id self.fatal_errors = [confluent_kafka.KafkaError._ALL_BROKERS_DOWN] # Config init self._init_config() # Initialize Avro serializers self._init_serializers(True) # Initialize producer self.producer = confluent_kafka.Producer(**self.prod_config.confluent_kafka_config) # Add callback wrappers around producer to Tornado IOLoop io_loop = tornado.ioloop.IOLoop.instance() io_loop.add_callback(self.poll) # Add schema update to Tornado Periodic Callback self.update_interval = 60*60*1000 tornado_callback = tornado.ioloop.PeriodicCallback( self.load_data, self.update_interval, io_loop=io_loop) tornado_callback.start() self.logger.log('INFO', 'Successfully Initialized Kafka Producer')
def test_delete_non_existent_topic(self): tx_id = "0" producer = ck.Producer({ 'bootstrap.servers': self.redpanda.brokers(), 'transactional.id': tx_id, }) producer.init_transactions() producer.begin_transaction() error_topic_name = "error_topic" for topic in self.topics: assert error_topic_name is not topic.name for partition in range(topic.partition_count): producer.produce(topic.name, '0', '0', partition) producer.flush() try: self.admin.delete_partition_from_transaction( tx_id, "kafka", error_topic_name, 0, 0) except requests.exceptions.HTTPError as e: assert e.response.text == '{"message": "Can not find partition({kafka/error_topic/0}) in transaction for delete", "code": 400}' producer.commit_transaction()
def get_kafka_producer(): producer = None try: producer = confluent_kafka.Producer(**get_config()) except Exception as e: print('Could not create kafka producer', e, file=sys.stderr) return producer
def test_delete_non_existent_tid(self): tx_id = "0" producer = ck.Producer({ 'bootstrap.servers': self.redpanda.brokers(), 'transactional.id': tx_id, }) producer.init_transactions() producer.begin_transaction() for topic in self.topics: for partition in range(topic.partition_count): producer.produce(topic.name, '0', '0', partition) producer.flush() txs_info = self.admin.get_all_transactions() tx = txs_info[0] topic_name = self.topics[0].name error_tx_id = "1" for partition in tx["partitions"]: if partition["topic"] == topic_name: try: self.admin.delete_partition_from_transaction( error_tx_id, partition["ns"], partition["topic"], partition["partition_id"], partition["etag"]) except requests.exceptions.HTTPError as e: assert e.response.text == '{"message": "Unexpected tx_error error: Unknown server error", "code": 500}' producer.commit_transaction()
def test_delete_non_existent_etag(self): tx_id = "0" producer = ck.Producer({ 'bootstrap.servers': self.redpanda.brokers(), 'transactional.id': tx_id, }) producer.init_transactions() producer.begin_transaction() for topic in self.topics: for partition in range(topic.partition_count): producer.produce(topic.name, '0', '0', partition) producer.flush() txs_info = self.admin.get_all_transactions() tx = txs_info[0] topic_name = self.topics[0].name for partition in tx["partitions"]: if partition["topic"] == topic_name: try: self.admin.delete_partition_from_transaction( tx_id, partition["ns"], partition["topic"], partition["partition_id"], partition["etag"] + 100) except requests.exceptions.HTTPError as e: e.response.text == '{{"message": "Can not find partition({{{}/{}/{}}}) in transaction for delete", "code": 400}}'.format( partition["ns"], partition["topic"], partition["partition_id"]) producer.commit_transaction()
def kafka_sink(producer_conf, output_topic, parsed_df): producer = ck.Producer(producer_conf) json_str = parsed_df.to_json(orient="records", lines=True) json_recs = json_str.split("\n") for json_rec in json_recs: producer.produce(output_topic, json_rec) producer.flush()
def __init__(self, config, topic, start_shutdown): self.loop = asyncio.get_event_loop() self.producer = confluent_kafka.Producer(config) self.topic = topic self.start_shutdown = start_shutdown self.polling_task = asyncio.create_task(self.polling_task())
def __init__(self, serializer: Serializable, kafka_topic: str, bootstrap_server: str = "localhost", bootstrap_port: int = 9092, emit_buffer_batch_size: int = 10, emit_buffer_timeout_ms: int = 100, *args, **kwargs): # For java, need to ensure it is a bytesSerializer """Connects to a specified kafka topic and sets up the emitter. Args: serializer (Serializable): Serializer to convert a message to bytes before sending to kafka. kafka_topic (str): Name of kafka topic to publish to. bootstrap_server (str, optional): Address of the Kafka bootstrap server. Defaults to "localhost". bootstrap_port (int, optional): Bootstrap server port on which the topic is listening for messages. Defaults to 9092. """ self.kafka_topic = kafka_topic conf = { "bootstrap.servers": bootstrap_server + ":" + str(bootstrap_port), "client.id": socket.gethostname() } self.create_topic(topic_name=kafka_topic, conf=conf) self.producer = confluent_kafka.Producer(conf) super().__init__(serializer=serializer, emit_buffer_batch_size=emit_buffer_batch_size, emit_buffer_timeout_ms=emit_buffer_timeout_ms, *args, **kwargs)
def main(): # # Parse arguments # args = get_args() # # Create producer class # producer_config = create_producer_config(args) print("Producer configuration: ") print(yaml.dump(producer_config, default_flow_style=False)) producer = confluent_kafka.Producer(**producer_config) # # Produce 10 messages. Note that as of version 1.4.1, using this with # Python 3.8 will give a deprecation message (https://github.com/confluentinc/confluent-kafka-python/issues/763) # warnings.filterwarnings("ignore", category=DeprecationWarning) for i in range(args.messages): msg = {"msg_count": i} print("Producing message %d" % i) try: producer.produce(TOPIC, serialize(msg), callback=on_delivery) except BaseException: print("Received exception") raise producer.poll(0) print("Flushing") len = producer.flush(5) if len > 0: print("There are still %d messages in the queue" % len)
def test_conf_none(): """ Issue #133 Test that None can be passed for NULL by setting bootstrap.servers to None. If None would be converted to a string then a broker would show up in statistics. Verify that it doesnt. """ def stats_cb_check_no_brokers(stats_json_str): """ Make sure no brokers are reported in stats """ global seen_stats_cb_check_no_brokers stats = json.loads(stats_json_str) assert len(stats['brokers']) == 0, "expected no brokers in stats: %s" % stats_json_str seen_stats_cb_check_no_brokers = True conf = {'bootstrap.servers': 'something', 'bootstrap.servers': None, # overwrites previous value 'statistics.interval.ms': 10, 'stats_cb': stats_cb_check_no_brokers} p = confluent_kafka.Producer(conf) p.poll(timeout=1) global seen_stats_cb_check_no_brokers assert seen_stats_cb_check_no_brokers
def __init__(self, cluster, sched, topic, rate): super(RateProducer, self).__init__() conf = cluster.client_config() conf.update({ 'enable.idempotence': True, 'linger.ms': 50, # 'debug': 'eos,msg,protocol,broker,topic' }) self.run = True self.p = confluent_kafka.Producer(conf) self.topic = topic self.cluster = cluster self.msgid = 0 self.cnt = {'dr_err': 0, 'dr_ok': 0, 'err': 0} self.partcnt = defaultdict(dict) self.logger = logging.getLogger('RateProducer') self.logger.setLevel(logging.DEBUG) self.logger.addHandler(loghandler) self.sched = sched self.jobs = [] self.jobs.append( sched.add_job(self.send_msg, 'interval', seconds=1.0 / rate)) self.jobs.append(sched.add_job(self.poll))
def __init__( self, hosts: List[str], max_flush_time_on_full_buffer: float = 5.0, statistics_interval_ms: int = 5000, use_confluent_monitoring_interceptor: bool = False, logger: Optional[logging.Logger] = None, debug: bool = False, ): conf = { "bootstrap.servers": ",".join(hosts), "statistics.interval.ms": statistics_interval_ms, "error_cb": self.error_callback, "stats_cb": self.stats_callback, "throttle_cb": self.throttle_callback, "on_delivery": self.delivery_report_callback, } if use_confluent_monitoring_interceptor: conf["plugin.library.paths"] = "monitoring-interceptor" if debug: conf["debug"] = "topic,broker" self.max_flush_time_on_full_buffer = max_flush_time_on_full_buffer self.logger = logger or logging.getLogger("KafkaProducer") self._kafka_instance = confluent_kafka.Producer(conf, logger=self.logger) self._async_poll = async_wrap(self._kafka_instance.poll) self._async_flush = async_wrap(self._kafka_instance.flush)