Python AvroConsumer.position 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: confluent_kafka.avro

클래스/타입: AvroConsumer

메소드/함수: position

hotexamples.com에서의 예제들: 4

Python AvroConsumer.position - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 confluent_kafka.avro.AvroConsumer.position에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

poll(30)

AvroConsumer(30)

subscribe(30)

assign(16)

commit(11)

close(5)

assignment(4)

get_watermark_offsets(3)

position(3)

seek(3)

list_topics(2)

committed(1)

consume(1)

offsets_for_times(1)

unsubscribe(1)

예제 #1

파일 보기

class KafkaWorker(BaseWorker):
    topic_name = None
    consumer_name = None
    consumer_settings = {}
    commit_on_complete = False
    async_commit = True
    poll_timeout = 0
    auto_offset_reset = 'earliest'
    consumer = None
    last_message = None

    def setup(self):
        self.consumer = AvroConsumer(self.get_consumer_settings())
        self.consumer.subscribe([self.get_topic_name()])

    def teardown(self):
        if self.consumer:
            self.consumer.close()

    def get_topic_name(self):
        return self.topic_name or utils.config_missing('topic name')

    def get_consumer_name(self):
        return self.consumer_name or utils.generate_random_consumer_name()

    def get_consumer_settings(self):
        default_settings = {
            'group.id': self.get_consumer_name(),
            'default.topic.config': {'auto.offset.reset': self.auto_offset_reset},
            'enable.auto.commit': False,
            'bootstrap.servers': utils.get_broker_url(),
            'schema.registry.url': utils.get_schema_registry_url(),
            'session.timeout.ms': 10000,
            'heartbeat.interval.ms': 1000,
            'api.version.request': True,
        }
        return utils.generate_client_settings(default_settings, self.consumer_settings)

    def poll(self):
        message = self.consumer.poll(timeout=self.poll_timeout)
        if message is not None:
            self.last_message = message
        return message

    def get_partitions(self):
        partitions = self.consumer.assignment()
        if not partitions:
            self.poll()
            partitions = self.consumer.assignment()
        return partitions

    def get_current_offsets(self):
        return self.consumer.position(self.get_partitions())

    def reset_consumer_offsets(self, offset):
        self.consumer.assign([TopicPartition(tp.topic, tp.partition, offset)
                              for tp in self.get_partitions()])

    def seek_to_timestamp(self, timestamp):
        timestamp_ms = dt_to_unix_ms(timestamp)
        partitions = self.get_partitions()
        for tp in partitions:
            tp.offset = timestamp_ms
        partitions = self.consumer.offsets_for_times(partitions)
        self.consumer.assign(partitions)

    def handle(self):
        message = self.poll()

        if message is None:
            self.wait()

        elif message.error():
            if message.error().code() == KafkaError._PARTITION_EOF:
                self.partition_eof(message)

            else:
                raise KafkaException(message.error())

        else:
            self._consume(message)

            if self.commit_on_complete:
                self.commit()

        self.done()

    def commit(self):
        if not self.consumer_settings.get('enable.auto.commit'):
            self.consumer.commit(asynchronous=self.async_commit)

    def _consume(self, message):
        self.consume_message(MessageValue(message))

    def consume_message(self, message):
        pass

    def partition_eof(self, message):
        pass

예제 #2

파일 보기

파일: Server.py 프로젝트: teomores/kafka-twitter

def batch_filtering(cityfilter='ALL', mentionfilter='ALL', tagfilter='ALL'):
    if 'username' in request.cookies:
        username = request.cookies['username']
        print(f"Ok, {username}, let's fetch the latest tweets!")
        c = AvroConsumer({
            'bootstrap.servers': BOOTSTRAP_SERVERS,
            'group.id': username,
            'schema.registry.url': SCHEMA_REGISTRY_URL,
            #'isolation.level': 'read_committed'
        })
        c.assign([TopicPartition(TOPIC, 0, 0)])
        low_offset, high_offset = c.get_watermark_offsets(
            TopicPartition(TOPIC, 0))
        #print(f"the latest offset is {high_offset}, the low is {low_offset}")

        # move consumer to offset=high_offset-WINDOW_LEN (only if > 0)
        if high_offset - WINDOW_LEN > 0:
            new_offset = high_offset - WINDOW_LEN
        else:
            new_offset = low_offset
        c.seek(TopicPartition(TOPIC, 0, new_offset))

        msgs = []  # to store the messages to be returned
        pos = c.position([TopicPartition(TOPIC, 0, new_offset)])
        while pos[0].offset < high_offset:
            try:
                msg = c.poll(0)

            except SerializerError as e:
                print("Message deserialization failed for {}: {}".format(
                    msg, e))
                break

            if msg is None:
                continue

            if msg.error():
                print("AvroConsumer error: {}".format(msg.error()))
                continue

            author = msg.value()['author']
            content = msg.value()['content']
            #kafka_timestamp = datetime.datetime.fromtimestamp(float(msg.timestamp()[1]/1000)).strftime('%H:%M:%S, %d-%m-%Y')
            timestamp = datetime.datetime.fromtimestamp(
                float(msg.value()['timestamp'])).strftime('%H:%M:%S, %d-%m-%Y')
            message_ts = float(msg.value()['timestamp'])
            location = msg.value()['location']
            tags = [h[1:] for h in content.split() if h.startswith('#')]
            mentions = [h[1:] for h in content.split() if h.startswith('@')]
            display_message = f"[{author}] {content} ({location} - {timestamp})"
            print(f"[{author}] {content} ({location} - {timestamp})")
            #print(f"consumer position: {c.position([TopicPartition(TOPIC, 0, new_offset)])}")
            pos = c.position([TopicPartition(TOPIC, 0, new_offset)])

            if cityfilter != 'ALL' and mentionfilter != 'ALL' and tagfilter != 'ALL':
                if (location.lower() == cityfilter) and (
                        mentionfilter.lower()
                        in mentions) and (tagfilter.lower() in tags):
                    msgs.append((display_message, message_ts))
            elif cityfilter == 'ALL' and mentionfilter != 'ALL' and tagfilter != 'ALL':
                if (mentionfilter.lower() in mentions) and (tagfilter.lower()
                                                            in tags):
                    msgs.append((display_message, message_ts))
            elif cityfilter != 'ALL' and mentionfilter == 'ALL' and tagfilter != 'ALL':
                if (location.lower() == cityfilter) and (tagfilter.lower()
                                                         in tags):
                    msgs.append((display_message, message_ts))
            elif cityfilter != 'ALL' and mentionfilter != 'ALL' and tagfilter == 'ALL':
                if (location.lower() == cityfilter) and (mentionfilter.lower()
                                                         in mentions):
                    msgs.append((display_message, message_ts))
            elif cityfilter != 'ALL' and mentionfilter == 'ALL' and tagfilter == 'ALL':
                if (location.lower() == cityfilter):
                    msgs.append((display_message, message_ts))
            elif cityfilter == 'ALL' and mentionfilter != 'ALL' and tagfilter == 'ALL':
                if (mentionfilter.lower() in mentions):
                    msgs.append((display_message, message_ts))
            elif cityfilter == 'ALL' and mentionfilter == 'ALL' and tagfilter != 'ALL':
                if (tagfilter.lower() in tags):
                    msgs.append((display_message, message_ts))
            else:
                msgs.append((display_message, message_ts))
        c.close()
        # finally return dictonary of messages
        msgs = list(
            set(msgs)
        )  # this is done to ensure that no duplicates of a message are shown in timeline
        msgs = sorted(msgs, key=lambda x: x[1])
        msgs = [m[0] for m in msgs]
        print(msgs)
        return {"results": msgs}
    else:
        return {"results": ['Oooops, your are not logged in...']}

예제 #3

파일 보기

파일: consumers.py 프로젝트: revpoint/jangl-utils

class KafkaWorker(BaseWorker):
    topic_name = None
    consumer_name = None
    consumer_settings = {}
    commit_on_complete = False
    async_commit = True
    poll_timeout = 0
    auto_offset_reset = 'earliest'
    consumer = None
    last_message = None

    def setup(self):
        self.consumer = AvroConsumer(self.get_consumer_settings())
        self.consumer.subscribe([self.get_topic_name()])

    def teardown(self):
        if self.consumer:
            self.consumer.close()

    def get_topic_name(self):
        return self.topic_name or utils.config_missing('topic name')

    def get_consumer_name(self):
        return self.consumer_name or utils.generate_random_consumer_name()

    def get_consumer_settings(self):
        default_settings = {
            'group.id': self.get_consumer_name(),
            'default.topic.config': {'auto.offset.reset': self.auto_offset_reset},
            'enable.auto.commit': False,
            'bootstrap.servers': utils.get_broker_url(),
            'schema.registry.url': utils.get_schema_registry_url(),
            'session.timeout.ms': 10000,
            'heartbeat.interval.ms': 1000,
            'api.version.request': True,
        }
        return utils.generate_client_settings(default_settings, self.consumer_settings)

    def poll(self):
        message = self.consumer.poll(timeout=self.poll_timeout)
        if message is not None:
            self.last_message = message
        return message

    def get_partitions(self):
        partitions = self.consumer.assignment()
        if not partitions:
            self.poll()
            partitions = self.consumer.assignment()
        return partitions

    def get_current_offsets(self):
        return self.consumer.position(self.get_partitions())

    def reset_consumer_offsets(self, offset):
        self.consumer.assign([TopicPartition(tp.topic, tp.partition, offset)
                              for tp in self.get_partitions()])

    def seek_to_timestamp(self, timestamp):
        timestamp_ms = dt_to_unix_ms(timestamp)
        partitions = self.get_partitions()
        for tp in partitions:
            tp.offset = timestamp_ms
        partitions = self.consumer.offsets_for_times(partitions)
        self.consumer.assign(partitions)

    def handle(self):
        message = self.poll()

        if message is None:
            self.wait()

        elif message.error():
            if message.error().code() == KafkaError._PARTITION_EOF:
                self.partition_eof(message)

            else:
                raise KafkaException(message.error())

        else:
            self._consume(message)

            if self.commit_on_complete:
                self.commit()

        self.done()

    def commit(self):
        if not self.consumer_settings.get('enable.auto.commit'):
            self.consumer.commit(async=self.async_commit)

    def _consume(self, message):
        self.consume_message(MessageValue(message))

    def consume_message(self, message):
        pass

    def partition_eof(self, message):
        pass

예제 #4

파일 보기

파일: Server.py 프로젝트: teomores/kafka-twitter

def streaming_filtering():
    cityfilter = request.form['cityfilter']
    mentionfilter = request.form['mentionfilter']
    tagfilter = request.form['tagfilter']
    print(f'cityfilter: {cityfilter}')
    print(f'mentionfilter: {mentionfilter}')
    print(f'tagfilter: {tagfilter}')

    if 'username' in request.cookies:
        username = request.cookies['username']
        print(f"Ok, {username}, let's stream the latest tweets!")
        c = AvroConsumer({
            'bootstrap.servers': BOOTSTRAP_SERVERS,
            'group.id': username,
            'schema.registry.url': SCHEMA_REGISTRY_URL
        })
        c.assign([TopicPartition(TOPIC, 0, 0)])
        low_offset, high_offset = c.get_watermark_offsets(
            TopicPartition(TOPIC, 0))
        print(f"the latest offset is {high_offset}, the low is {low_offset}")
        print(f"consumer position: {c.position([TopicPartition(TOPIC, 0)])}")

        # move consumer to top
        c.seek(TopicPartition(TOPIC, 0, high_offset))

        msgs = []
        pos = c.position([TopicPartition(TOPIC, 0, high_offset)])

        def gen(msgs):  # generator funciton for streaming
            print('ciao')
            while True:
                try:
                    msg = c.poll(1)

                except SerializerError as e:
                    print("Message deserialization failed for {}: {}".format(
                        msg, e))
                    break

                if msg is None:
                    current_ts = time.time()
                    msgs = [
                        m for m in msgs
                        if (float(current_ts) -
                            float(m[1])) < STREAMING_WINDOW_SECONDS
                    ]
                    ret_msgs = [m[0] for m in msgs]
                    yield f' `{json.dumps(ret_msgs)}` '
                    continue

                if msg.error():
                    current_ts = time.time()
                    msgs = [
                        m for m in msgs
                        if (float(current_ts) -
                            float(m[1])) < STREAMING_WINDOW_SECONDS
                    ]
                    ret_msgs = [m[0] for m in msgs]
                    yield f' `{json.dumps(ret_msgs)}` '
                    print("AvroConsumer error: {}".format(msg.error()))
                    continue

                # get message fields
                author = msg.value()['author']
                content = msg.value()['content']
                #kafka_timestamp = datetime.datetime.fromtimestamp(float(msg.timestamp()[1]/1000)).strftime('%H:%M:%S, %d-%m-%Y')
                timestamp = datetime.datetime.fromtimestamp(
                    float(msg.value()['timestamp'])).strftime(
                        '%H:%M:%S, %d-%m-%Y')
                location = msg.value()['location']
                tags = [h[1:] for h in content.split() if h.startswith('#')]
                mentions = [
                    h[1:] for h in content.split() if h.startswith('@')
                ]
                # create display_message
                display_message = f"[{author}] {content} ({location} - {timestamp})"
                display_message = display_message.replace(
                    "`", "'")  # serve per leggere lo streaming
                message_ts = float(msg.value()['timestamp'])
                print(f"{display_message}")
                print(
                    f"consumer position: {c.position([TopicPartition(TOPIC, 0, high_offset)])}"
                )
                pos = c.position([TopicPartition(TOPIC, 0, high_offset)])
                print('prima')
                print(f'cityfilter: {cityfilter}')
                print(f'mentionfilter: {mentionfilter}')
                print(f'tagfilter: {tagfilter}')

                if cityfilter != 'ALL' and mentionfilter != 'ALL' and tagfilter != 'ALL':
                    if (location.lower() == cityfilter) and (
                            mentionfilter.lower()
                            in mentions) and (tagfilter.lower() in tags):
                        msgs.append((display_message, message_ts))
                elif cityfilter == 'ALL' and mentionfilter != 'ALL' and tagfilter != 'ALL':
                    if (mentionfilter.lower()
                            in mentions) and (tagfilter.lower() in tags):
                        msgs.append((display_message, message_ts))
                elif cityfilter != 'ALL' and mentionfilter == 'ALL' and tagfilter != 'ALL':
                    if (location.lower() == cityfilter) and (tagfilter.lower()
                                                             in tags):
                        msgs.append((display_message, message_ts))
                elif cityfilter != 'ALL' and mentionfilter != 'ALL' and tagfilter == 'ALL':
                    if (location.lower()
                            == cityfilter) and (mentionfilter.lower()
                                                in mentions):
                        msgs.append((display_message, message_ts))
                elif cityfilter != 'ALL' and mentionfilter == 'ALL' and tagfilter == 'ALL':
                    if (location.lower() == cityfilter):
                        msgs.append((display_message, message_ts))
                elif cityfilter == 'ALL' and mentionfilter != 'ALL' and tagfilter == 'ALL':
                    if (mentionfilter.lower() in mentions):
                        msgs.append((display_message, message_ts))
                elif cityfilter == 'ALL' and mentionfilter == 'ALL' and tagfilter != 'ALL':
                    if (tagfilter.lower() in tags):
                        msgs.append((display_message, message_ts))
                else:
                    msgs.append((display_message, message_ts))

                # remove old messages
                current_ts = time.time()
                msgs = [
                    m for m in msgs if (float(current_ts) -
                                        float(m[1])) < STREAMING_WINDOW_SECONDS
                ]
                #msgs = list(set(msgs))
                msgs = sorted(msgs, key=lambda x: x[1])
                ret_msgs = [m[0] for m in msgs]
                yield f' `{json.dumps(ret_msgs)}` '

        return Response(stream_with_context(gen(msgs)))
    else:
        return {"results": ['Oooops, your are not logged in...']}