Example #1
0
async def produce_for_topic(*, loop, producer_settings, topic_name, schema,
                            schema_id, period):
    logger = structlog.get_logger().bind(topic=topic_name)

    # Preparse schema
    schema = fastavro.parse_schema(schema)
    logger.info('Preparsed schema')

    # Start up the producer
    producer = aiokafka.AIOKafkaProducer(loop=loop, **producer_settings)
    await producer.start()
    logger.info('Started producer')

    # Generate and write messages
    try:
        for message in generate_message(schema):
            logger.debug('New message', message=message)
            message_fh = BytesIO()
            message_fh.write(struct.pack('>bI', MAGIC_BYTE, schema_id))
            fastavro.schemaless_writer(
                message_fh,
                schema,
                message
            )
            message_fh.seek(0)
            await producer.send_and_wait(
                topic_name, value=message_fh.read())
            # increment prometheus production counter
            PRODUCED.inc()
            logger.debug('Sent message')
            # naieve message period; need to correct for production time
            await asyncio.sleep(period)
    finally:
        await producer.stop()
Example #2
0
    async def startup(self, state: State) -> State:
        self.producer = aiokafka.AIOKafkaProducer(
            bootstrap_servers=state.settings.kafka_servers)

        await self.producer.start()
        state.producer = self.producer
        return state
Example #3
0
 async def create_producer(self, **kwargs):
     producer = aiokafka.AIOKafkaProducer(
         loop=self.Loop,
         bootstrap_servers=self.get_bootstrap_servers(),
         compression_type=self.get_compression(),
         **kwargs)
     return producer
Example #4
0
async def kafka_producer_factory(config):
    if config["ssl_context"]:
        config = dict(config,
                      ssl_context=create_ssl_context(**config["ssl_context"]))
    producer = aiokafka.AIOKafkaProducer(**config)
    await producer.start()
    return producer
Example #5
0
async def main(source_topic, sink_topic):
    consumer = aiokafka.AIOKafkaConsumer(
        group_id="events_cons_explain",
        enable_auto_commit=False,
        auto_offset_reset="earliest",
    )
    await consumer.start()
    consumer.subscribe([source_topic])

    producer = aiokafka.AIOKafkaProducer()
    await producer.start()

    mem = defaultdict(list)

    async def handle_msg(msg):
        val = mem[msg.key.decode()]
        new_val = int(msg.value.decode())
        if (
                new_val not in val
        ):  # make idempotent; see the time gap between setting this and the consumer.commit
            val.append(new_val)
        print(f"Out value {msg.key.decode()} -> {shortlist(val)}")
        await producer.send(sink_topic,
                            value=json.dumps(val).encode(),
                            key=msg.key)

    try:
        async for msg in consumer:
            print(f"In value {msg.key.decode()} -> {msg.value.decode()}")
            await handle_msg(msg)
            await consumer.commit()

    finally:
        await consumer.stop()
        await producer.stop()
Example #6
0
 async def _get_producer(self) -> aiokafka.AIOKafkaProducer:
     if self._producer is None:
         self._producer = aiokafka.AIOKafkaProducer(
             bootstrap_servers=cast(List[str], self._kafka_servers),
             loop=asyncio.get_event_loop(),
             api_version=self._kafka_api_version,
         )
         await self._producer.start()
     return self._producer
Example #7
0
 async def _connection(self):
     producer = aiokafka.AIOKafkaProducer(
         loop=self.Loop, bootstrap_servers=self.get_bootstrap_servers())
     try:
         await producer.start()
         await self._loader(producer=producer)
     except BaseException as e:
         L.exception("Unexpected Kafka Error.")
     finally:
         await producer.stop()
 async def create_producer(self, **kwargs):
     producer = aiokafka.AIOKafkaProducer(
         loop=self.Loop,
         bootstrap_servers=self.get_bootstrap_servers(),
         compression_type=self.get_compression(),
         security_protocol=self.Config.get('security_protocol'),
         sasl_mechanism=self.Config.get('sasl_mechanism'),
         sasl_plain_username=self.Config.get('sasl_plain_username') or None,
         sasl_plain_password=self.Config.get('sasl_plain_password') or None,
         **kwargs)
     return producer
Example #9
0
 async def _get_producer(self, loop):
     """Inits kafka producer."""
     kwargs = {'loop': loop, 'bootstrap_servers': self.config.kafka.hosts}
     ssl_context = self.config.kafka.ssl_context()
     if ssl_context:
         kwargs['ssl_context'] = ssl_context
         kwargs['security_protocol'] = 'SSL'
     producer = aiokafka.AIOKafkaProducer(
         **kwargs)
     await loop.create_task(producer.start())
     return producer
Example #10
0
 def producer_factory(self) -> aiokafka.AIOKafkaProducer:
     return aiokafka.AIOKafkaProducer(
         bootstrap_servers=cast(List[str], self._kafka_servers),
         loop=asyncio.get_event_loop(),
         api_version=self._kafka_api_version,
         **{
             k: v
             for k, v in self.kafka_settings.items()
             if k in _aiokafka_producer_settings
         },
     )
Example #11
0
    async def send(self, name: str, message: str, **kwargs) -> None:
        await self.producer.start()

        try:
            await self.producer.send_and_wait(name, bytes(message, 'utf8'),
                                              **kwargs)

        finally:
            await self.producer.stop()
            _loop = asyncio.get_running_loop()
            self.producer = aiokafka.AIOKafkaProducer(
                loop=_loop, bootstrap_servers=self.addr)
Example #12
0
 def __init__(self, group_id_id, memupdate_topicname="mem-updater"):
     # we should check here existence of the topic and that it has the right number of partitions.
     # maybe even check that when writing the item goes to the right partition
     self.group_id_id = group_id_id
     self._mem: Dict[str, List[int]] = defaultdict(list)
     self.memupdate_topicname = memupdate_topicname
     self.producer = aiokafka.AIOKafkaProducer()
     self.snapshots = [
         {}
     ]  # start with empty snapshot so that below we can assume there always is a previous
     self.started = False
     self._current_key = None
Example #13
0
async def main():
    loop = asyncio.get_event_loop()
    producer = aiokafka.AIOKafkaProducer(loop=loop,
                                         bootstrap_servers='128.0.255.10:9092')
    await producer.start()
    try:
        result = await producer.send_and_wait("my_topic",
                                              key=b"test",
                                              value=b"hello,world!")
        print(result)
        print(result.offset)
    finally:
        await producer.stop()
async def main(config):
    """Record website availability results to kafka"""
    producer = aiokafka.AIOKafkaProducer(value_serializer=json_serialize,
                                         **config.KAFKA_OPTS)

    await producer.start()
    try:
        while True:
            for url, pattern in config.URLS:
                asyncio.create_task(check_url(config, producer, url, pattern))
            await asyncio.sleep(config.CHECK_FREQUENCY)
    finally:
        await producer.stop()
Example #15
0
async def produce_for_simple_topic(*, loop, httpsession, producer_settings,
                                   schema_registry_url, topic_name, period):
    logger = structlog.get_logger(__name__).bind(
        role='producer',
        topic=topic_name,
    )

    logger.info('Getting schemas')
    schema_uri = URITemplate(schema_registry_url +
                             '/subjects{/subject}/versions/latest')
    headers = {'Accept': 'application/vnd.schemaregistry.v1+json'}

    # Get key schema
    r = await httpsession.get(schema_uri.expand(subject=topic_name + '-key'),
                              headers=headers)
    data = await r.json()
    key_schema = fastavro.parse_schema(json.loads(data['schema']))

    # Get value schema
    r = await httpsession.get(schema_uri.expand(subject=topic_name + '-value'),
                              headers=headers)
    data = await r.json()
    value_schema = fastavro.parse_schema(json.loads(data['schema']))

    default_key_fh = BytesIO()
    fastavro.schemaless_writer(default_key_fh, key_schema, {})
    default_key_fh.seek(0)
    default_key = default_key_fh.read()

    # Set up producer
    producer = aiokafka.AIOKafkaProducer(loop=loop, **producer_settings)
    await producer.start()
    logger.info(f'Started producer')

    try:
        while True:
            message_fh = BytesIO()
            fastavro.schemaless_writer(
                message_fh, value_schema,
                {'timestamp': datetime.datetime.now(datetime.timezone.utc)})
            message_fh.seek(0)
            # May want to adjust this control batching latency
            await producer.send_and_wait(topic_name,
                                         key=default_key,
                                         value=message_fh.read())
            PRODUCED.inc()  # increment prometheus production counter
            logger.debug('Sent message')
            # naieve message period; need to correct for production time
            await asyncio.sleep(period)
    finally:
        await producer.stop()
Example #16
0
 def on_init(self) -> None:
     transport = cast(Transport, self.transport)
     self._producer = aiokafka.AIOKafkaProducer(
         loop=self.loop,
         bootstrap_servers=server_list(transport.url,
                                       transport.default_port),
         client_id=transport.app.conf.broker_client_id,
         acks=self.acks,
         linger_ms=self.linger_ms,
         max_batch_size=self.max_batch_size,
         max_request_size=self.max_request_size,
         compression_type=self.compression_type,
         on_irrecoverable_error=self._on_irrecoverable_error,
     )
Example #17
0
async def kafka_gateway(request_chan: ac.Chan) -> None:
    loop = asyncio.get_running_loop()
    producer = aiokafka.AIOKafkaProducer(loop=loop,
                                         bootstrap_servers='kafka:9092')
    await producer.start()

    response_chan = ac.Chan()
    consumer_task = ac.go(consume(response_chan))

    pending_responses = {}

    try:
        key_seq = 1
        while True:
            result, chan = await ac.select(request_chan, response_chan)
            if result is None:
                break

            key: str
            msg: str
            resp_chan: ac.Chan
            if chan is request_chan:
                msg, resp_chan = result
                key = f'msg{key_seq}'
                key_seq += 1
                logging.info(f"Requesting salutation {key}, {msg}")
                await producer.send_and_wait("salutation-requests",
                                             key=key.encode('utf8'),
                                             value=msg)
                logging.info("Message sent")
                await producer.flush()
                pending_responses[key] = resp_chan
            elif chan is response_chan:
                key_bytes, msg_bytes = result
                key, msg = key_bytes.decode('utf8'), msg_bytes.decode('utf8')
                if key in pending_responses:
                    resp_chan = pending_responses[key]
                    del pending_responses[key]
                    await resp_chan.put(msg)
                else:
                    logging.error(f"Message key '{key}' not awaiting response")
    except asyncio.CancelledError:
        logging.info("Gateway cancelled")
    finally:
        consumer_task.cancel()
        await asyncio.gather(consumer_task)

        logging.info("Stopping producer")
        await producer.stop()
Example #18
0
 def __init__(self, config: Config, logger: logging.Logger,
              event_loop: asyncio.AbstractEventLoop, queue: asyncio.Queue):
     super().__init__(config, logger, event_loop, queue)
     context = create_ssl_context(
         cafile=self.config.kafka.cafile,
         certfile=self.config.kafka.cert,
         keyfile=self.config.kafka.key,
         password=self.config.kafka.passwd,
     )
     self.producer = aiokafka.AIOKafkaProducer(
         loop=self.loop,
         bootstrap_servers=self.config.kafka.servers,
         security_protocol="SSL",
         ssl_context=context,
     )
Example #19
0
 def __init__(self,
              loop: AbstractEventLoop,
              topic,
              bootstrap_servers,
              frequency_ms=1000 / 60):
     self.loop = loop
     self.producer = aiokafka.AIOKafkaProducer(
         loop=self.loop, bootstrap_servers=bootstrap_servers)
     self.topic = topic
     self.running = True
     # executor = ProcessPoolExecutor()
     self.task = loop.run_in_executor(executor=None,
                                      func=self._flush_measurements)
     self.frequency = datetime.timedelta(milliseconds=frequency_ms)
     self.buffer = b''
Example #20
0
 def on_init(self) -> None:
     transport = cast(Transport, self.transport)
     conf = transport.app.conf
     self._producer = aiokafka.AIOKafkaProducer(
         loop=self.loop,
         bootstrap_servers=server_list(transport.url,
                                       transport.default_port),
         client_id=conf.broker_client_id,
         acks=self.acks,
         linger_ms=self.linger_ms,
         max_batch_size=self.max_batch_size,
         max_request_size=self.max_request_size,
         compression_type=self.compression_type,
         on_irrecoverable_error=self._on_irrecoverable_error,
         security_protocol="SSL" if conf.ssl_context else "PLAINTEXT",
         ssl_context=conf.ssl_context,
     )
Example #21
0
async def main(group_id_id):
    asyncio.create_task(start_eldm())

    consumer = memconsumer.AIOKafkaMemConsumer(
        group_id="events_cons",
        mem_unique_id=group_id_id,
        auto_offset_reset="earliest",
        topic="events",
    )
    await consumer.start()

    producer = aiokafka.AIOKafkaProducer()
    await producer.start()

    mem = consumer.get_mem()

    async def handle_msg(msg):
        log.info(f"  Handling msg: {msg.offset}-->{msg.key}:{msg.value}.")
        # the key in mem[key] should be the message key, that is how we align mem data with source
        # topic data.  This is also checked in the AIOKafkaMemConsumer.  There are other solutions
        # to this, but for now we have only implemented this one.  Also we work under the assumption
        # that the source topic doesn't do any custom partition assignments.
        val = mem[msg.key.decode()]
        new_val = int(msg.value.decode())
        if (
                not new_val in val
        ):  # make idempotent; so if commit on mem-update happens, but not of read value don't crash
            val.append(new_val)
        setitem_info = await mem.setitem(msg.key.decode(), val
                                         )  # <------ UGLY but needed (for now)
        await producer.send("event-lists",
                            value=json.dumps(val).encode(),
                            key=msg.key)
        log.info(f"  Done handling msg: {msg.value}")

    try:
        async for msg in consumer.items():
            await handle_msg(msg)
            await consumer.commit(
            )  # this commit should happen after the mem.setitem

    finally:
        print("stopping consumer/producer")
        await consumer.stop()
        await producer.stop()
        print("done")
Example #22
0
 def on_init(self) -> None:
     transport = cast(Transport, self.transport)
     self._producer = aiokafka.AIOKafkaProducer(
         loop=self.loop,
         bootstrap_servers=server_list(
             transport.url, transport.default_port),
         client_id=self.client_id,
         acks=self.acks,
         linger_ms=self.linger_ms,
         max_batch_size=self.max_batch_size,
         max_request_size=self.max_request_size,
         compression_type=self.compression_type,
         on_irrecoverable_error=self._on_irrecoverable_error,
         security_protocol='SSL' if self.ssl_context else 'PLAINTEXT',
         ssl_context=self.ssl_context,
         partitioner=self.partitioner or DefaultPartitioner(),
     )
Example #23
0
    def __init__(self):
        super().__init__()

        cfg = configparser.ConfigParser()
        cfg.read('./production-site.conf')

        # Initialize MongoDBClient
        self.MongoDBClient = motor.motor_asyncio.AsyncIOMotorClient(
            host=cfg.get('mongodb', 'url'),
            port=27017,
            driver=pymongo.driver_info.DriverInfo(
                name="rattlepy.MongoDBClient", platform="rattlepy"),
            io_loop=self.Loop)

        # Initialize KafkaProducer
        self.KafkaProducer = aiokafka.AIOKafkaProducer(
            bootstrap_servers=cfg.get('kafka', 'url'), loop=self.Loop)
Example #24
0
def producer(
    bootstrap_servers,
    loop=loop,
    request_timeout_ms=10000,
    connections_max_idle_ms=None,
    name="writer",
):
    """
    producer returns a wrapped kafka producer that will reconnect.
    """
    return ReconnectingClient(
        aiokafka.AIOKafkaProducer(
            bootstrap_servers=bootstrap_servers,
            loop=loop,
            request_timeout_ms=request_timeout_ms,
            connections_max_idle_ms=connections_max_idle_ms,
        ),
        name,
    )
Example #25
0
async def main(delay_ms, red_paint_key, quiet):
    consumer = aiokafka.AIOKafkaConsumer(
        group_id="red_paint", enable_auto_commit=True, auto_offset_reset="earliest"
    )
    await consumer.start()
    consumer.subscribe(["event-lists"])
    producer = aiokafka.AIOKafkaProducer()
    await producer.start()
    delay_s = delay_ms / 1000
    paint = RedPaint(keys=red_paint_key, delay=delay_s, quiet=quiet)

    try:
        # start Task sending events
        send_task = asyncio.create_task(send_paint(paint, producer))
        # start Task receiving events
        recv_task = asyncio.create_task(recv_paint(paint, consumer, delay_s * 10))

        # wait for tasks
        await asyncio.gather(send_task, recv_task, return_exceptions=False)
    finally:
        await consumer.stop()
        await producer.stop()
Example #26
0
 def _init_producer(self) -> None:
     self._producer = aiokafka.AIOKafkaProducer(**(self.producer_options))
Example #27
0
 def __init__(self, url, **kwargs):
     loop = asyncio.get_running_loop()
     self._producer = aiokafka.AIOKafkaProducer(loop=loop,
                                                bootstrap_servers=url,
                                                **kwargs)
     self._start_task = None
Example #28
0
import aiokafka

from random import randint
from kafka import TopicPartition

KAFKA_TOPIC = os.getenv('KAFKA_TOPIC', "URL")
KAFKA_CONSUMER_GROUP_PREFIX = os.getenv('KAFKA_CONSUMER_GROUP_PREFIX',
                                        'url-group')
KAFKA_BOOTSTRAP_SERVERS = os.getenv('KAFKA_BOOTSTRAP_SERVERS',
                                    '127.0.0.1:9093')

logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s',
                    level=logging.INFO)
log = logging.getLogger(__name__)
loop = asyncio.get_event_loop()
aioproducer = aiokafka.AIOKafkaProducer(
    loop=loop, bootstrap_servers=KAFKA_BOOTSTRAP_SERVERS)
consumer = None
consumer_task = None


def get_producer() -> aiokafka.AIOKafkaProducer:
    global aioproducer
    return aioproducer


async def initialize():
    log.debug("Initializing the kafka consumer....")

    global consumer
    global loop
Example #29
0
    async def update(self, topic_partitions):
        self._snapshot()
        self._mem = defaultdict(list)
        # b/c all on_partitions_revoked have been called nothing is getting produced to the topic anymore
        # that means we can add tokens that we can read back to see we are fully up to date
        log.info(
            f"Updating mem for partitions {sorted(p.partition for p in topic_partitions)} : {topic_partitions}"
        )
        if not topic_partitions:
            log.info("no partitions so we don't do anything with mem.")
            return

        if len(set(p.topic for p in topic_partitions)) > 1:
            raise Exception("boom: multiple topics reassinged, unexpected")

        partitions_done = {p.partition: False for p in topic_partitions}

        uptodate_token = uuid.uuid4().bytes

        # send token into all partitions
        token_producer = aiokafka.AIOKafkaProducer()
        await token_producer.start()
        for topic_partition in topic_partitions:
            await token_producer.send(
                self.memupdate_topicname,
                partition=topic_partition.partition,
                key="token".encode(
                ),  # to allow for compaction, don't need earlier uptodate_token
                value=uptodate_token)
        await token_producer.stop()
        log.debug("produced tokens to mem-updater topic.")

        memupdater_consumer = aiokafka.AIOKafkaConsumer(
            group_id=f"mem-updater-{self.group_id_id}",
            auto_offset_reset='earliest',
        )

        await memupdater_consumer.start()
        parts = [
            TopicPartition(self.memupdate_topicname, t.partition)
            for t in topic_partitions
        ]
        memupdater_consumer.assign(partitions=[
            TopicPartition(self.memupdate_topicname, t.partition)
            for t in topic_partitions
        ])
        await memupdater_consumer.seek_to_beginning(*parts)
        while any(not partitions_done[p.partition] for p in topic_partitions):
            log.debug(
                f"getting msg from mem-updater topic, state: {partitions_done}."
            )
            msg = await memupdater_consumer.getone()
            log.debug(f"msg {msg} in partition {msg.partition}")
            if msg.value == uptodate_token:
                log.debug("msg was current uptodate_token.")
                partitions_done[msg.partition] = True
            else:
                try:
                    headers = {
                        k: v.decode()
                        for k, v in msg.headers
                    }  ## not yet used, but for tracing / types / updates
                    key = msg.key.decode()
                    value = json.loads(msg.value.decode())
                    self._setitem(key, value)
                    log.debug(f"Mem updated: {key} with value: {value}")
                except UnicodeDecodeError as e:
                    log.debug(
                        f"Expect decode errors here {e}: {msg} should be outdated uptodate_token."
                    )

        await memupdater_consumer.stop()
        self._check_snapshot_consistency()
        log.info(f"Mem update complete {self}.")
Example #30
0
 def __init__(self, dsn: str, logger: logging.Logger = None) -> None:
     super().__init__(dsn, logger)
     _loop = asyncio.get_running_loop()
     self.addr = urllib.parse.urlparse(dsn).netloc  # type: ignore
     self.producer = aiokafka.AIOKafkaProducer(loop=_loop,
                                               bootstrap_servers=self.addr)