예제 #1
0
def get_message_batch_cudf(kafka_params, topic, partition, keys, low, high, timeout=None): # pragma: no cover
    """
    Fetch a batch of kafka messages (currently, messages must be in JSON format)
    in given topic/partition as a cudf dataframe
    """
    from custreamz import kafka
    consumer = kafka.Consumer(kafka_params)
    gdf = None
    try:
        gdf = consumer.read_gdf(topic=topic, partition=partition, lines=True, start=low, end=high + 1)
    finally:
        consumer.close()
    return gdf
예제 #2
0
    def start(self):
        import confluent_kafka as ck
        if self.engine == "cudf":  # pragma: no cover
            from custreamz import kafka

        if self.stopped:
            if self.engine == "cudf":  # pragma: no cover
                self.consumer = kafka.Consumer(self.consumer_params)
            else:
                self.consumer = ck.Consumer(self.consumer_params)
            self.stopped = False
            tp = ck.TopicPartition(self.topic, 0, 0)

            # blocks for consumer thread to come up
            self.consumer.get_watermark_offsets(tp)
            self.loop.add_callback(self.poll_kafka)
예제 #3
0
    def start(self):
        import confluent_kafka as ck
        if self.engine == "cudf":  # pragma: no cover
            from custreamz import kafka

        if self.stopped:
            if self.engine == "cudf":  # pragma: no cover
                self.consumer = kafka.Consumer(self.consumer_params)
            else:
                self.consumer = ck.Consumer(self.consumer_params)
            weakref.finalize(self, lambda consumer=self.consumer: _close_consumer(consumer))
            self.stopped = False
            tp = ck.TopicPartition(self.topic, 0, 0)

            # blocks for consumer thread to come up and invoke poll to establish
            # connection with broker to fetch oauth token for kafka
            self.consumer.poll(timeout=1)
            self.consumer.get_watermark_offsets(tp)
            self.loop.add_callback(self.poll_kafka)
예제 #4
0
def kafka_client():

    # Check for the existence of a kafka broker
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    try:
        s.connect_ex(("localhost", 9092))
        s.shutdown(2)
        s.close()
    except Exception:
        pytest.skip(
            "A running Kafka instance must be available to run these tests")

    kafka_configs = {
        "metadata.broker.list": "localhost:9092",
        "enable.partition.eof": "true",
        "group.id": "groupid",
        "auto.offset.reset": "earliest",
        "enable.auto.commit": "false",
    }

    return kafka.Consumer(kafka_configs)