async def produce_for_topic(*, loop, producer_settings, topic_name, schema, schema_id, period): logger = structlog.get_logger().bind(topic=topic_name) # Preparse schema schema = fastavro.parse_schema(schema) logger.info('Preparsed schema') # Start up the producer producer = aiokafka.AIOKafkaProducer(loop=loop, **producer_settings) await producer.start() logger.info('Started producer') # Generate and write messages try: for message in generate_message(schema): logger.debug('New message', message=message) message_fh = BytesIO() message_fh.write(struct.pack('>bI', MAGIC_BYTE, schema_id)) fastavro.schemaless_writer( message_fh, schema, message ) message_fh.seek(0) await producer.send_and_wait( topic_name, value=message_fh.read()) # increment prometheus production counter PRODUCED.inc() logger.debug('Sent message') # naieve message period; need to correct for production time await asyncio.sleep(period) finally: await producer.stop()
async def startup(self, state: State) -> State: self.producer = aiokafka.AIOKafkaProducer( bootstrap_servers=state.settings.kafka_servers) await self.producer.start() state.producer = self.producer return state
async def create_producer(self, **kwargs): producer = aiokafka.AIOKafkaProducer( loop=self.Loop, bootstrap_servers=self.get_bootstrap_servers(), compression_type=self.get_compression(), **kwargs) return producer
async def kafka_producer_factory(config): if config["ssl_context"]: config = dict(config, ssl_context=create_ssl_context(**config["ssl_context"])) producer = aiokafka.AIOKafkaProducer(**config) await producer.start() return producer
async def main(source_topic, sink_topic): consumer = aiokafka.AIOKafkaConsumer( group_id="events_cons_explain", enable_auto_commit=False, auto_offset_reset="earliest", ) await consumer.start() consumer.subscribe([source_topic]) producer = aiokafka.AIOKafkaProducer() await producer.start() mem = defaultdict(list) async def handle_msg(msg): val = mem[msg.key.decode()] new_val = int(msg.value.decode()) if ( new_val not in val ): # make idempotent; see the time gap between setting this and the consumer.commit val.append(new_val) print(f"Out value {msg.key.decode()} -> {shortlist(val)}") await producer.send(sink_topic, value=json.dumps(val).encode(), key=msg.key) try: async for msg in consumer: print(f"In value {msg.key.decode()} -> {msg.value.decode()}") await handle_msg(msg) await consumer.commit() finally: await consumer.stop() await producer.stop()
async def _get_producer(self) -> aiokafka.AIOKafkaProducer: if self._producer is None: self._producer = aiokafka.AIOKafkaProducer( bootstrap_servers=cast(List[str], self._kafka_servers), loop=asyncio.get_event_loop(), api_version=self._kafka_api_version, ) await self._producer.start() return self._producer
async def _connection(self): producer = aiokafka.AIOKafkaProducer( loop=self.Loop, bootstrap_servers=self.get_bootstrap_servers()) try: await producer.start() await self._loader(producer=producer) except BaseException as e: L.exception("Unexpected Kafka Error.") finally: await producer.stop()
async def create_producer(self, **kwargs): producer = aiokafka.AIOKafkaProducer( loop=self.Loop, bootstrap_servers=self.get_bootstrap_servers(), compression_type=self.get_compression(), security_protocol=self.Config.get('security_protocol'), sasl_mechanism=self.Config.get('sasl_mechanism'), sasl_plain_username=self.Config.get('sasl_plain_username') or None, sasl_plain_password=self.Config.get('sasl_plain_password') or None, **kwargs) return producer
async def _get_producer(self, loop): """Inits kafka producer.""" kwargs = {'loop': loop, 'bootstrap_servers': self.config.kafka.hosts} ssl_context = self.config.kafka.ssl_context() if ssl_context: kwargs['ssl_context'] = ssl_context kwargs['security_protocol'] = 'SSL' producer = aiokafka.AIOKafkaProducer( **kwargs) await loop.create_task(producer.start()) return producer
def producer_factory(self) -> aiokafka.AIOKafkaProducer: return aiokafka.AIOKafkaProducer( bootstrap_servers=cast(List[str], self._kafka_servers), loop=asyncio.get_event_loop(), api_version=self._kafka_api_version, **{ k: v for k, v in self.kafka_settings.items() if k in _aiokafka_producer_settings }, )
async def send(self, name: str, message: str, **kwargs) -> None: await self.producer.start() try: await self.producer.send_and_wait(name, bytes(message, 'utf8'), **kwargs) finally: await self.producer.stop() _loop = asyncio.get_running_loop() self.producer = aiokafka.AIOKafkaProducer( loop=_loop, bootstrap_servers=self.addr)
def __init__(self, group_id_id, memupdate_topicname="mem-updater"): # we should check here existence of the topic and that it has the right number of partitions. # maybe even check that when writing the item goes to the right partition self.group_id_id = group_id_id self._mem: Dict[str, List[int]] = defaultdict(list) self.memupdate_topicname = memupdate_topicname self.producer = aiokafka.AIOKafkaProducer() self.snapshots = [ {} ] # start with empty snapshot so that below we can assume there always is a previous self.started = False self._current_key = None
async def main(): loop = asyncio.get_event_loop() producer = aiokafka.AIOKafkaProducer(loop=loop, bootstrap_servers='128.0.255.10:9092') await producer.start() try: result = await producer.send_and_wait("my_topic", key=b"test", value=b"hello,world!") print(result) print(result.offset) finally: await producer.stop()
async def main(config): """Record website availability results to kafka""" producer = aiokafka.AIOKafkaProducer(value_serializer=json_serialize, **config.KAFKA_OPTS) await producer.start() try: while True: for url, pattern in config.URLS: asyncio.create_task(check_url(config, producer, url, pattern)) await asyncio.sleep(config.CHECK_FREQUENCY) finally: await producer.stop()
async def produce_for_simple_topic(*, loop, httpsession, producer_settings, schema_registry_url, topic_name, period): logger = structlog.get_logger(__name__).bind( role='producer', topic=topic_name, ) logger.info('Getting schemas') schema_uri = URITemplate(schema_registry_url + '/subjects{/subject}/versions/latest') headers = {'Accept': 'application/vnd.schemaregistry.v1+json'} # Get key schema r = await httpsession.get(schema_uri.expand(subject=topic_name + '-key'), headers=headers) data = await r.json() key_schema = fastavro.parse_schema(json.loads(data['schema'])) # Get value schema r = await httpsession.get(schema_uri.expand(subject=topic_name + '-value'), headers=headers) data = await r.json() value_schema = fastavro.parse_schema(json.loads(data['schema'])) default_key_fh = BytesIO() fastavro.schemaless_writer(default_key_fh, key_schema, {}) default_key_fh.seek(0) default_key = default_key_fh.read() # Set up producer producer = aiokafka.AIOKafkaProducer(loop=loop, **producer_settings) await producer.start() logger.info(f'Started producer') try: while True: message_fh = BytesIO() fastavro.schemaless_writer( message_fh, value_schema, {'timestamp': datetime.datetime.now(datetime.timezone.utc)}) message_fh.seek(0) # May want to adjust this control batching latency await producer.send_and_wait(topic_name, key=default_key, value=message_fh.read()) PRODUCED.inc() # increment prometheus production counter logger.debug('Sent message') # naieve message period; need to correct for production time await asyncio.sleep(period) finally: await producer.stop()
def on_init(self) -> None: transport = cast(Transport, self.transport) self._producer = aiokafka.AIOKafkaProducer( loop=self.loop, bootstrap_servers=server_list(transport.url, transport.default_port), client_id=transport.app.conf.broker_client_id, acks=self.acks, linger_ms=self.linger_ms, max_batch_size=self.max_batch_size, max_request_size=self.max_request_size, compression_type=self.compression_type, on_irrecoverable_error=self._on_irrecoverable_error, )
async def kafka_gateway(request_chan: ac.Chan) -> None: loop = asyncio.get_running_loop() producer = aiokafka.AIOKafkaProducer(loop=loop, bootstrap_servers='kafka:9092') await producer.start() response_chan = ac.Chan() consumer_task = ac.go(consume(response_chan)) pending_responses = {} try: key_seq = 1 while True: result, chan = await ac.select(request_chan, response_chan) if result is None: break key: str msg: str resp_chan: ac.Chan if chan is request_chan: msg, resp_chan = result key = f'msg{key_seq}' key_seq += 1 logging.info(f"Requesting salutation {key}, {msg}") await producer.send_and_wait("salutation-requests", key=key.encode('utf8'), value=msg) logging.info("Message sent") await producer.flush() pending_responses[key] = resp_chan elif chan is response_chan: key_bytes, msg_bytes = result key, msg = key_bytes.decode('utf8'), msg_bytes.decode('utf8') if key in pending_responses: resp_chan = pending_responses[key] del pending_responses[key] await resp_chan.put(msg) else: logging.error(f"Message key '{key}' not awaiting response") except asyncio.CancelledError: logging.info("Gateway cancelled") finally: consumer_task.cancel() await asyncio.gather(consumer_task) logging.info("Stopping producer") await producer.stop()
def __init__(self, config: Config, logger: logging.Logger, event_loop: asyncio.AbstractEventLoop, queue: asyncio.Queue): super().__init__(config, logger, event_loop, queue) context = create_ssl_context( cafile=self.config.kafka.cafile, certfile=self.config.kafka.cert, keyfile=self.config.kafka.key, password=self.config.kafka.passwd, ) self.producer = aiokafka.AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.config.kafka.servers, security_protocol="SSL", ssl_context=context, )
def __init__(self, loop: AbstractEventLoop, topic, bootstrap_servers, frequency_ms=1000 / 60): self.loop = loop self.producer = aiokafka.AIOKafkaProducer( loop=self.loop, bootstrap_servers=bootstrap_servers) self.topic = topic self.running = True # executor = ProcessPoolExecutor() self.task = loop.run_in_executor(executor=None, func=self._flush_measurements) self.frequency = datetime.timedelta(milliseconds=frequency_ms) self.buffer = b''
def on_init(self) -> None: transport = cast(Transport, self.transport) conf = transport.app.conf self._producer = aiokafka.AIOKafkaProducer( loop=self.loop, bootstrap_servers=server_list(transport.url, transport.default_port), client_id=conf.broker_client_id, acks=self.acks, linger_ms=self.linger_ms, max_batch_size=self.max_batch_size, max_request_size=self.max_request_size, compression_type=self.compression_type, on_irrecoverable_error=self._on_irrecoverable_error, security_protocol="SSL" if conf.ssl_context else "PLAINTEXT", ssl_context=conf.ssl_context, )
async def main(group_id_id): asyncio.create_task(start_eldm()) consumer = memconsumer.AIOKafkaMemConsumer( group_id="events_cons", mem_unique_id=group_id_id, auto_offset_reset="earliest", topic="events", ) await consumer.start() producer = aiokafka.AIOKafkaProducer() await producer.start() mem = consumer.get_mem() async def handle_msg(msg): log.info(f" Handling msg: {msg.offset}-->{msg.key}:{msg.value}.") # the key in mem[key] should be the message key, that is how we align mem data with source # topic data. This is also checked in the AIOKafkaMemConsumer. There are other solutions # to this, but for now we have only implemented this one. Also we work under the assumption # that the source topic doesn't do any custom partition assignments. val = mem[msg.key.decode()] new_val = int(msg.value.decode()) if ( not new_val in val ): # make idempotent; so if commit on mem-update happens, but not of read value don't crash val.append(new_val) setitem_info = await mem.setitem(msg.key.decode(), val ) # <------ UGLY but needed (for now) await producer.send("event-lists", value=json.dumps(val).encode(), key=msg.key) log.info(f" Done handling msg: {msg.value}") try: async for msg in consumer.items(): await handle_msg(msg) await consumer.commit( ) # this commit should happen after the mem.setitem finally: print("stopping consumer/producer") await consumer.stop() await producer.stop() print("done")
def on_init(self) -> None: transport = cast(Transport, self.transport) self._producer = aiokafka.AIOKafkaProducer( loop=self.loop, bootstrap_servers=server_list( transport.url, transport.default_port), client_id=self.client_id, acks=self.acks, linger_ms=self.linger_ms, max_batch_size=self.max_batch_size, max_request_size=self.max_request_size, compression_type=self.compression_type, on_irrecoverable_error=self._on_irrecoverable_error, security_protocol='SSL' if self.ssl_context else 'PLAINTEXT', ssl_context=self.ssl_context, partitioner=self.partitioner or DefaultPartitioner(), )
def __init__(self): super().__init__() cfg = configparser.ConfigParser() cfg.read('./production-site.conf') # Initialize MongoDBClient self.MongoDBClient = motor.motor_asyncio.AsyncIOMotorClient( host=cfg.get('mongodb', 'url'), port=27017, driver=pymongo.driver_info.DriverInfo( name="rattlepy.MongoDBClient", platform="rattlepy"), io_loop=self.Loop) # Initialize KafkaProducer self.KafkaProducer = aiokafka.AIOKafkaProducer( bootstrap_servers=cfg.get('kafka', 'url'), loop=self.Loop)
def producer( bootstrap_servers, loop=loop, request_timeout_ms=10000, connections_max_idle_ms=None, name="writer", ): """ producer returns a wrapped kafka producer that will reconnect. """ return ReconnectingClient( aiokafka.AIOKafkaProducer( bootstrap_servers=bootstrap_servers, loop=loop, request_timeout_ms=request_timeout_ms, connections_max_idle_ms=connections_max_idle_ms, ), name, )
async def main(delay_ms, red_paint_key, quiet): consumer = aiokafka.AIOKafkaConsumer( group_id="red_paint", enable_auto_commit=True, auto_offset_reset="earliest" ) await consumer.start() consumer.subscribe(["event-lists"]) producer = aiokafka.AIOKafkaProducer() await producer.start() delay_s = delay_ms / 1000 paint = RedPaint(keys=red_paint_key, delay=delay_s, quiet=quiet) try: # start Task sending events send_task = asyncio.create_task(send_paint(paint, producer)) # start Task receiving events recv_task = asyncio.create_task(recv_paint(paint, consumer, delay_s * 10)) # wait for tasks await asyncio.gather(send_task, recv_task, return_exceptions=False) finally: await consumer.stop() await producer.stop()
def _init_producer(self) -> None: self._producer = aiokafka.AIOKafkaProducer(**(self.producer_options))
def __init__(self, url, **kwargs): loop = asyncio.get_running_loop() self._producer = aiokafka.AIOKafkaProducer(loop=loop, bootstrap_servers=url, **kwargs) self._start_task = None
import aiokafka from random import randint from kafka import TopicPartition KAFKA_TOPIC = os.getenv('KAFKA_TOPIC', "URL") KAFKA_CONSUMER_GROUP_PREFIX = os.getenv('KAFKA_CONSUMER_GROUP_PREFIX', 'url-group') KAFKA_BOOTSTRAP_SERVERS = os.getenv('KAFKA_BOOTSTRAP_SERVERS', '127.0.0.1:9093') logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) log = logging.getLogger(__name__) loop = asyncio.get_event_loop() aioproducer = aiokafka.AIOKafkaProducer( loop=loop, bootstrap_servers=KAFKA_BOOTSTRAP_SERVERS) consumer = None consumer_task = None def get_producer() -> aiokafka.AIOKafkaProducer: global aioproducer return aioproducer async def initialize(): log.debug("Initializing the kafka consumer....") global consumer global loop
async def update(self, topic_partitions): self._snapshot() self._mem = defaultdict(list) # b/c all on_partitions_revoked have been called nothing is getting produced to the topic anymore # that means we can add tokens that we can read back to see we are fully up to date log.info( f"Updating mem for partitions {sorted(p.partition for p in topic_partitions)} : {topic_partitions}" ) if not topic_partitions: log.info("no partitions so we don't do anything with mem.") return if len(set(p.topic for p in topic_partitions)) > 1: raise Exception("boom: multiple topics reassinged, unexpected") partitions_done = {p.partition: False for p in topic_partitions} uptodate_token = uuid.uuid4().bytes # send token into all partitions token_producer = aiokafka.AIOKafkaProducer() await token_producer.start() for topic_partition in topic_partitions: await token_producer.send( self.memupdate_topicname, partition=topic_partition.partition, key="token".encode( ), # to allow for compaction, don't need earlier uptodate_token value=uptodate_token) await token_producer.stop() log.debug("produced tokens to mem-updater topic.") memupdater_consumer = aiokafka.AIOKafkaConsumer( group_id=f"mem-updater-{self.group_id_id}", auto_offset_reset='earliest', ) await memupdater_consumer.start() parts = [ TopicPartition(self.memupdate_topicname, t.partition) for t in topic_partitions ] memupdater_consumer.assign(partitions=[ TopicPartition(self.memupdate_topicname, t.partition) for t in topic_partitions ]) await memupdater_consumer.seek_to_beginning(*parts) while any(not partitions_done[p.partition] for p in topic_partitions): log.debug( f"getting msg from mem-updater topic, state: {partitions_done}." ) msg = await memupdater_consumer.getone() log.debug(f"msg {msg} in partition {msg.partition}") if msg.value == uptodate_token: log.debug("msg was current uptodate_token.") partitions_done[msg.partition] = True else: try: headers = { k: v.decode() for k, v in msg.headers } ## not yet used, but for tracing / types / updates key = msg.key.decode() value = json.loads(msg.value.decode()) self._setitem(key, value) log.debug(f"Mem updated: {key} with value: {value}") except UnicodeDecodeError as e: log.debug( f"Expect decode errors here {e}: {msg} should be outdated uptodate_token." ) await memupdater_consumer.stop() self._check_snapshot_consistency() log.info(f"Mem update complete {self}.")
def __init__(self, dsn: str, logger: logging.Logger = None) -> None: super().__init__(dsn, logger) _loop = asyncio.get_running_loop() self.addr = urllib.parse.urlparse(dsn).netloc # type: ignore self.producer = aiokafka.AIOKafkaProducer(loop=_loop, bootstrap_servers=self.addr)