Python TopicPartition.TopicPartition Exemples, kafka.structs.TopicPartition.TopicPartition Python Exemples

Exemple #1

0

Afficher le fichier

    def with_partitions(self, partitions_to_add):
        """Returns a copy of cluster metadata with partitions added"""
        new_metadata = ClusterMetadata(**self.config)
        new_metadata._brokers = copy.deepcopy(self._brokers)
        new_metadata._partitions = copy.deepcopy(self._partitions)
        new_metadata._broker_partitions = copy.deepcopy(self._broker_partitions)
        new_metadata._groups = copy.deepcopy(self._groups)
        new_metadata.internal_topics = copy.deepcopy(self.internal_topics)
        new_metadata.unauthorized_topics = copy.deepcopy(self.unauthorized_topics)

        for partition in partitions_to_add:
            new_metadata._partitions[partition.topic][partition.partition] = partition

            if partition.leader is not None and partition.leader != -1:
                new_metadata._broker_partitions[partition.leader].add(
                    TopicPartition(partition.topic, partition.partition))

        return new_metadata

Exemple #2

0

Afficher le fichier

def test__unpack_message_set(fetcher):
    fetcher.config['check_crcs'] = False
    tp = TopicPartition('foo', 0)
    messages = [
        (None, b"a", None),
        (None, b"b", None),
        (None, b"c", None),
    ]
    memory_records = MemoryRecords(_build_record_batch(messages))
    records = list(fetcher._unpack_message_set(tp, memory_records))
    assert len(records) == 3
    assert all(map(lambda x: isinstance(x, ConsumerRecord), records))
    assert records[0].value == b'a'
    assert records[1].value == b'b'
    assert records[2].value == b'c'
    assert records[0].offset == 0
    assert records[1].offset == 1
    assert records[2].offset == 2

Exemple #3

0

Afficher le fichier

Fichier : consumer_manager.py Projet : jonahharris/karapace

 async def set_assignments(self, internal_name: Tuple[str, str], content_type: str, request_data: dict):
     self.log.info("Updating assignments for %s to %r", internal_name, request_data)
     self._assert_consumer_exists(internal_name, content_type)
     self._assert_has_key(request_data, "partitions", content_type)
     partitions = []
     for el in request_data["partitions"]:
         convert_to_int(el, "partition", content_type)
         self._has_topic_and_partition_keys(el, content_type)
         partitions.append(TopicPartition(el["topic"], el["partition"]))
     async with self.consumer_locks[internal_name]:
         try:
             consumer = self.consumers[internal_name].consumer
             consumer.assign(partitions)
             self._update_partition_assignments(consumer)
             empty_response()
         except IllegalStateError as e:
             self._illegal_state_fail(message=str(e), content_type=content_type)
         finally:
             self.log.info("Done updating assignment")

Exemple #4

0

Afficher le fichier

def consumer_partitions_for_topic(consumer, topic):
    """Returns a list of all TopicPartitions for a given topic.

    Arguments:
        consumer: an initialized KafkaConsumer
        topic: a topic name to fetch TopicPartitions for

    :returns:
        list(TopicPartition): A list of TopicPartitions that belong to the given topic
    """
    topic_partitions = []
    partitions = consumer.partitions_for_topic(topic)
    if partitions is not None:
        for partition in partitions:
            topic_partitions.append(TopicPartition(topic, partition))
    else:
        logging.error(
            "No partitions found for topic {}. Maybe it doesn't exist?".format(
                topic), )
    return topic_partitions

Exemple #5

0

Afficher le fichier

Fichier : consume.py Projet : Helo250/ops-audit

 def prepare(self):
     topic = self._task.topic
     partitions = self._task.partitions
     self.consumer.unsubscribe()
     print('111', self.consumer.topics())
     if partitions:
         partitions = [
             TopicPartition(topic, int(part))
             for part in partitions.split(',')
         ]
         assert len(
             partitions
         ) == 1, 'current task should assigned only one partition'
         self.consumer.assign(partitions)
         # self.consumer.seek_to_beginning(*partitions)
         # self.consumer.seek(TopicPartition(topic, 0), 0)
     else:
         self.consumer.subscribe([topic])
     print('consumer topics', self._task, id(self.consumer),
           self.consumer.topics())

Exemple #6

0

Afficher le fichier

Fichier : kafka_data_generator.py Projet : dmarts/dlx-test-sources

def get_last_message() -> Dict:
    """
    Returns the last message of the queue in the specified topic
    :return: Dict of message if any message else None
    """
    consumer = KafkaConsumer(bootstrap_servers=args.host,
                             value_deserializer=lambda m: json.loads(m.decode('ascii')),
                             enable_auto_commit=False,
                             auto_offset_reset='earliest')
    topic_partition = TopicPartition(topic=args.topic, partition=0)
    consumer.assign([topic_partition])
    consumer.seek_to_end(topic_partition)
    last_offset = consumer.position(topic_partition)
    if last_offset != 0:
        consumer.seek_to_beginning(topic_partition)
        for msg in consumer:
            if msg.offset == last_offset - 1:
                break
        return msg.value
    return {}

Exemple #7

0

Afficher le fichier

Fichier : consumer_manager.py Projet : jonahharris/karapace

 async def seek_to(self, internal_name: Tuple[str, str], content_type: str, request_data: dict):
     self.log.info("Resetting offsets for %s to %r", internal_name, request_data)
     self._assert_consumer_exists(internal_name, content_type)
     self._assert_has_key(request_data, "offsets", content_type)
     seeks = []
     for el in request_data["offsets"]:
         self._assert_has_key(el, "topic", content_type)
         for k in ["offset", "partition"]:
             self._assert_has_key(el, k, content_type)
             convert_to_int(el, k, content_type)
         self._assert_positive_number(el, "offset", content_type)
         seeks.append((TopicPartition(topic=el["topic"], partition=el["partition"]), el["offset"]))
     async with self.consumer_locks[internal_name]:
         consumer = self.consumers[internal_name].consumer
         for part, offset in seeks:
             try:
                 consumer.seek(part, offset)
             except AssertionError:
                 self._illegal_state_fail(f"Partition {part} is unassigned", content_type)
         empty_response()

Exemple #8

0

Afficher le fichier

def main():
    while True:
        try:
            consumer = KafkaConsumer(
                'test',
                bootstrap_servers=['kafka:9092'],
                auto_offset_reset='earliest',
                group_id='1',
                value_deserializer=lambda x: json.loads(x.decode('utf-8')))
            break
        except Exception:
            print('connection error')
            time.sleep(3)

    for message in consumer:
        print(message.value)

        tp = TopicPartition(message.topic, message.partition)
        offsets = {tp: OffsetAndMetadata(message.offset, None)}
        consumer.commit(offsets=offsets)

Exemple #9

0

Afficher le fichier

Fichier : consumer_manager.py Projet : jonahharris/karapace

 async def get_offsets(self, internal_name: Tuple[str, str], content_type: str, request_data: dict):
     self.log.info("Retrieving offsets for %s", internal_name)
     self._assert_consumer_exists(internal_name, content_type)
     self._assert_has_key(request_data, "partitions", content_type)
     response = {"offsets": []}
     async with self.consumer_locks[internal_name]:
         consumer = self.consumers[internal_name].consumer
         for el in request_data["partitions"]:
             convert_to_int(el, "partition", content_type)
             tp = TopicPartition(el["topic"], el["partition"])
             commit_info = consumer.committed(tp, metadata=True)
             if not commit_info:
                 continue
             response["offsets"].append({
                 "topic": tp.topic,
                 "partition": tp.partition,
                 "metadata": commit_info.metadata,
                 "offset": commit_info.offset
             })
     KarapaceBase.r(body=response, content_type=content_type)

Exemple #10

0

Afficher le fichier

Fichier : test_fetcher.py Projet : yusif-ifraimov/kafka-python

def test__handle_offset_response(fetcher, mocker):
    # Broker returns UnsupportedForMessageFormatError, will omit partition
    fut = Future()
    res = OffsetResponse[1]([
        ("topic", [(0, 43, -1, -1)]),
        ("topic", [(1, 0, 1000, 9999)])
    ])
    fetcher._handle_offset_response(fut, res)
    assert fut.succeeded()
    assert fut.value == {TopicPartition("topic", 1): (9999, 1000)}

    # Broker returns NotLeaderForPartitionError
    fut = Future()
    res = OffsetResponse[1]([
        ("topic", [(0, 6, -1, -1)]),
    ])
    fetcher._handle_offset_response(fut, res)
    assert fut.failed()
    assert isinstance(fut.exception, NotLeaderForPartitionError)

    # Broker returns UnknownTopicOrPartitionError
    fut = Future()
    res = OffsetResponse[1]([
        ("topic", [(0, 3, -1, -1)]),
    ])
    fetcher._handle_offset_response(fut, res)
    assert fut.failed()
    assert isinstance(fut.exception, UnknownTopicOrPartitionError)

    # Broker returns many errors and 1 result
    # Will fail on 1st error and return
    fut = Future()
    res = OffsetResponse[1]([
        ("topic", [(0, 43, -1, -1)]),
        ("topic", [(1, 6, -1, -1)]),
        ("topic", [(2, 3, -1, -1)]),
        ("topic", [(3, 0, 1000, 9999)])
    ])
    fetcher._handle_offset_response(fut, res)
    assert fut.failed()
    assert isinstance(fut.exception, NotLeaderForPartitionError)

Exemple #11

0

Afficher le fichier

    def bash_consumer(self):
        self.consumer.assign([TopicPartition(topic=topic, partition=0)])

        for msg in self.consumer:
            msg_offset = msg.offset
            msg_value = (msg.value).decode("utf-8")
            msg_value = json.loads(msg_value)

            self.save_list.append(msg_value)

            save_data_size = sys.getsizeof(self.save_list)

            if save_data_size >= single_file_size * 1024 * 1024:
                save_file = self.log_file_path + "bash_history_log_" + str(
                    self.count)

                self.count += 1
                with open(save_file, "w+") as f:
                    for line in self.save_list:
                        f.write(line.decode("utf-8") + "\n")

            self.save_list = []
            print("*" * 10, "写入文件", "*" * 10)

            if save_to_tb:
                if self.tb_empty:
                    add_way = "replace"
                    self.tb_empty = False
                else:
                    add_way = "append"

                for k, v in msg_value.items():
                    msg_value[k] = [v]
                insert_df = pd.DataFrame.from_dict(msg_value)

                insert_df.to_sql(dbh_config["table"],
                                 self.dbh,
                                 if_exists=add_way,
                                 index=False)

            print("当前size", save_data_size, "\t", msg_value)

Exemple #12

0

Afficher le fichier

Fichier : server_kafka_to_elastic.py Projet : rafador/hypebot

    def run(self):
        consumer = KafkaConsumer(
            bootstrap_servers=f'{self.remote_ip}:{self.remote_port}',
            api_version=settings.KAFKA_API_VERSION,
            auto_offset_reset='earliest',
            group_id="tweet_consumer")
        consumer.subscribe([settings.KAFKA_TOPIC_TWEETS])

        try:
            while True:
                for message in consumer:
                    decoded_message = str(message.value.decode('utf-8'))
                    tractor.prepare_tweet_and_push_to_elastic(decoded_message)

                    tp = TopicPartition(message.topic, message.partition)
                    oem = OffsetAndMetadata(message.offset, '')
                    consumer.commit({tp: oem})
        except:
            raise
        finally:
            consumer.close()

Exemple #13

0

Afficher le fichier

Fichier : kafka.py Projet : gawseed/threat-feed-tools

    def open(self):
        super().initialize()

        self._consumer = KafkaConsumer(
            bootstrap_servers=self._bootstrap_servers,
            consumer_timeout_ms=self._timeout)

        # point to what we want at
        partition = TopicPartition(self._topic, self._partition)
        self._consumer.assign([partition])

        offset = None
        if self._begin_time:
            timestamp = self._begin_time * 1000
            offinfo = self._consumer.offsets_for_times({partition: timestamp})
            if offinfo == None or offinfo[partition] == None:
                raise ValueError(
                    "There is no data in the threat feed stream the begin date"
                )
            offset = offinfo[partition].offset
            self._consumer.seek(partition, offset)

Exemple #14

0

Afficher le fichier

    def test_load_metadata(self, protocol, conn):

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567, None),
            BrokerMetadata(1, 'broker_2', 5678, None)
        ]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [
            (NO_ERROR, 'topic_1', [
                (NO_ERROR, 0, 1, [1, 2], [1, 2])
            ]),
            (NO_ERROR, 'topic_noleader', [
                (NO_LEADER, 0, -1, [], []),
                (NO_LEADER, 1, -1, [], []),
            ]),
            (NO_LEADER, 'topic_no_partitions', []),
            (UNKNOWN_TOPIC_OR_PARTITION, 'topic_unknown', []),
            (NO_ERROR, 'topic_3', [
                (NO_ERROR, 0, 0, [0, 1], [0, 1]),
                (NO_ERROR, 1, 1, [1, 0], [1, 0]),
                (NO_ERROR, 2, 0, [0, 1], [0, 1])
            ])
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        # client loads metadata at init
        client = SimpleClient(hosts=['broker_1:4567'])
        self.assertDictEqual({
            TopicPartition('topic_1', 0): brokers[1],
            TopicPartition('topic_noleader', 0): None,
            TopicPartition('topic_noleader', 1): None,
            TopicPartition('topic_3', 0): brokers[0],
            TopicPartition('topic_3', 1): brokers[1],
            TopicPartition('topic_3', 2): brokers[0]},
            client.topics_to_brokers)

        # if we ask for metadata explicitly, it should raise errors
        with self.assertRaises(LeaderNotAvailableError):
            client.load_metadata_for_topics('topic_no_partitions')

        with self.assertRaises(UnknownTopicOrPartitionError):
            client.load_metadata_for_topics('topic_unknown')

        # This should not raise
        client.load_metadata_for_topics('topic_no_leader')

Exemple #15

0

Afficher le fichier

Fichier : test_fetcher.py Projet : yusif-ifraimov/kafka-python

def test_partition_records_offset():
    """Test that compressed messagesets are handle correctly
    when fetch offset is in the middle of the message list
    """
    batch_start = 120
    batch_end = 130
    fetch_offset = 123
    tp = TopicPartition('foo', 0)
    messages = [ConsumerRecord(tp.topic, tp.partition, i,
                               None, None, 'key', 'value', [], 'checksum', 0, 0, -1)
                for i in range(batch_start, batch_end)]
    records = Fetcher.PartitionRecords(fetch_offset, None, messages)
    assert len(records) > 0
    msgs = records.take(1)
    assert msgs[0].offset == fetch_offset
    assert records.fetch_offset == fetch_offset + 1
    msgs = records.take(2)
    assert len(msgs) == 2
    assert len(records) > 0
    records.discard()
    assert len(records) == 0

Exemple #16

0

Afficher le fichier

    def test_with_limited_retries(self):

        # lets create a queue and add 10 messages for 10 different partitions
        # to show how retries should work ideally
        for i in range(10):
            self.queue.put((TopicPartition("test", i), "msg %i" % i, "key %i" % i))

        def send_side_effect(reqs, *args, **kwargs):
            return [FailedPayloadsError(req) for req in reqs]

        self.client.send_produce_request.side_effect = send_side_effect

        self._run_process(3, 3)

        # the queue should be void at the end of the test
        self.assertEqual(self.queue.empty(), True)

        # there should be 16 non-void calls:
        # 3 initial batches of 3 msgs each + 1 initial batch of 1 msg +
        # 3 retries of the batches above = (1 + 3 retries) * 4 batches = 16
        self.assertEqual(self.client.send_produce_request.call_count, 16)

Exemple #17

0

Afficher le fichier

    def parse_member_metadata(cls, metadata):
        """
        Parses member metadata into a python object.
        This implementation only serializes and deserializes the StickyAssignorMemberMetadataV1 user data,
        since no StickyAssignor written in Python was deployed ever in the wild with version V0, meaning that
        there is no need to support backward compatibility with V0.

        Arguments:
          metadata (MemberMetadata): decoded metadata for a member of the group.

        Returns:
          parsed metadata (StickyAssignorMemberMetadataV1)
        """
        user_data = metadata.user_data
        if not user_data:
            return StickyAssignorMemberMetadataV1(
                partitions=[],
                generation=cls.DEFAULT_GENERATION_ID,
                subscription=metadata.subscription)

        try:
            decoded_user_data = StickyAssignorUserDataV1.decode(user_data)
        except Exception as e:
            # ignore the consumer's previous assignment if it cannot be parsed
            log.error("Could not parse member data", e)  # pylint: disable=logging-too-many-args
            return StickyAssignorMemberMetadataV1(
                partitions=[],
                generation=cls.DEFAULT_GENERATION_ID,
                subscription=metadata.subscription)

        member_partitions = []
        for topic, partitions in decoded_user_data.previous_assignment:  # pylint: disable=no-member
            member_partitions.extend(
                [TopicPartition(topic, partition) for partition in partitions])
        return StickyAssignorMemberMetadataV1(
            # pylint: disable=no-member
            partitions=member_partitions,
            generation=decoded_user_data.generation,
            subscription=metadata.subscription)

Exemple #18

0

Afficher le fichier

    async def wait_no_lag(self):
        print("Ensuring no lag")
        consumer_group = "f-simple"
        client = self.client
        await self.client.bootstrap()
        source = self.source
        source_builder = self._source_builder

        source_highwaters = source_builder._highwaters()
        source_tps = source_builder._assignment
        protocol_tps = [(source, [tp.partition for tp in source_tps])]

        node_id = next(broker.nodeId for broker in client.cluster.brokers())
        coordinator_request = GroupCoordinatorRequest_v0(consumer_group)
        coordinator_response = await client.send(node_id, coordinator_request)
        coordinator_id = coordinator_response.coordinator_id

        while True:
            consumer_offsets_req = OffsetFetchRequest_v1(
                consumer_group, protocol_tps)
            consumer_offsets_resp = await client.send(coordinator_id,
                                                      consumer_offsets_req)
            topics = consumer_offsets_resp.topics
            assert len(topics) == 1, f"{topics!r}"
            topic, partition_resps = topics[0]
            assert topic == source, f"{source}"
            assert len(partition_resps) == len(source_tps)

            # + 1 is to account for the difference in how faust commits
            positions = {
                TopicPartition(topic=source, partition=partition): offset + 1
                for partition, offset, _, _ in partition_resps
            }

            if positions != source_highwaters:
                print("There is lag. Waiting!")
                await asyncio.sleep(2.0)
            else:
                return

Exemple #19

0

Afficher le fichier

def topic_offsets(kafka_brokers, topic):
    client = SimpleClient(insure_is_array(kafka_brokers))
    topic_partitions = client.topic_partitions
    if topic not in topic_partitions:
        raise KafkaException("topic {} doesn't exists".format(topic))
    partitions = topic_partitions[topic]
    offset_requests = [
        OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
    ]
    offsets_responses = client.send_offset_request(offset_requests)
    client.close()
    partitions_and_offsets = {}
    for offset in offsets_responses:
        if offset.topic == topic:
            topic_offset = 0
            topic_partition = TopicPartition(topic=offset.topic,
                                             partition=offset.partition)
            if offset.offsets[0]:
                topic_offset = offset.offsets[0]
            partitions_and_offsets[topic_partition] = topic_offset

    return partitions_and_offsets

Exemple #20

0

Afficher le fichier

Fichier : connector_kafka.py Projet : tethys-services/tethys

    def _get_consumer(self):
        enable_auto_commit = self.consumer_params.pop("enable_auto_commit",
                                                      False)
        auto_offset_reset = self.consumer_params.pop("auto_offset_reset",
                                                     "earliest")
        consumer_timeout_ms = self.consumer_params.pop("consumer_timeout_ms",
                                                       10 * 1000)
        max_poll_records = self.consumer_params.pop("max_poll_records", 1)
        value_deserializer = self.consumer_params.pop(
            "value_deserializer", lambda x: json.loads(x.decode("utf-8")))

        consumer = KafkaConsumer(group_id=self.group_id,
                                 bootstrap_servers=self.bootstrap_servers,
                                 consumer_timeout_ms=consumer_timeout_ms,
                                 enable_auto_commit=enable_auto_commit,
                                 auto_offset_reset=auto_offset_reset,
                                 max_poll_records=max_poll_records,
                                 value_deserializer=value_deserializer,
                                 **self.consumer_params)
        consumer.assign([TopicPartition(self.topic, self.partition)])

        return consumer

Exemple #21

0

Afficher le fichier

def test_fetch_committed_offsets(mocker, coordinator):

    # No partitions, no IO polling
    mocker.patch.object(coordinator._client, 'poll')
    assert coordinator.fetch_committed_offsets([]) == {}
    assert coordinator._client.poll.call_count == 0

    # general case -- send offset fetch request, get successful future
    mocker.patch.object(coordinator, 'ensure_coordinator_known')
    mocker.patch.object(coordinator,
                        '_send_offset_fetch_request',
                        return_value=Future().success('foobar'))
    partitions = [TopicPartition('foobar', 0)]
    ret = coordinator.fetch_committed_offsets(partitions)
    assert ret == 'foobar'
    coordinator._send_offset_fetch_request.assert_called_with(partitions)
    assert coordinator._client.poll.call_count == 1

    # Failed future is raised if not retriable
    coordinator._send_offset_fetch_request.return_value = Future().failure(
        AssertionError)
    coordinator._client.poll.reset_mock()
    try:
        coordinator.fetch_committed_offsets(partitions)
    except AssertionError:
        pass
    else:
        assert False, 'Exception not raised when expected'
    assert coordinator._client.poll.call_count == 1

    coordinator._client.poll.reset_mock()
    coordinator._send_offset_fetch_request.side_effect = [
        Future().failure(Errors.RequestTimedOutError),
        Future().success('fizzbuzz')
    ]

    ret = coordinator.fetch_committed_offsets(partitions)
    assert ret == 'fizzbuzz'
    assert coordinator._client.poll.call_count == 2  # call + retry

Exemple #22

0

Afficher le fichier

    def test_async_producer_not_leader(self):

        for i in range(10):
            self.queue.put((TopicPartition("test", i), "msg %i", "key %i"))

        # Mock offsets counter for closure
        offsets = collections.defaultdict(
            lambda: collections.defaultdict(lambda: 0))
        self.client.is_first_time = True

        def send_side_effect(reqs, *args, **kwargs):
            if self.client.is_first_time:
                self.client.is_first_time = False
                return [
                    ProduceResponsePayload(req.topic, req.partition,
                                           NotLeaderForPartitionError.errno,
                                           -1) for req in reqs
                ]

            responses = []
            for req in reqs:
                offset = offsets[req.topic][req.partition]
                offsets[req.topic][req.partition] += len(req.messages)
                responses.append(
                    ProduceResponsePayload(req.topic, req.partition, 0,
                                           offset))
            return responses

        self.client.send_produce_request.side_effect = send_side_effect

        self._run_process(2)

        # the queue should be void at the end of the test
        self.assertEqual(self.queue.empty(), True)

        # there should be 5 non-void calls: 1st failed batch of 3 msgs
        # + 3 batches of 3 msgs each + 1 batch of 1 msg = 1 + 3 + 1 = 5
        self.assertEqual(self.client.send_produce_request.call_count, 5)

Exemple #23

0

Afficher le fichier

 def setupTable(self, manku_motwani, topic_name, minutes=1440):
     self.consumer = KafkaConsumer(bootstrap_servers=['localhost:9092'])
     self.tp = TopicPartition(topic_name, 0)
     self.cur_offset = self.consumer.end_offsets([self.tp])
     current_time = datetime.datetime.now()
     old_time = current_time - datetime.timedelta(minutes=minutes)
     old_epoch_ts = int(old_time.timestamp() * 1000)  # in miliseconds
     self.old_offsets = self.consumer.offsets_for_times(
         {self.tp: old_epoch_ts})
     self.consumer.assign([self.tp])
     self.consumer.seek(self.tp, int(self.old_offsets[self.tp].offset))
     print("StartOffset: ", int(self.old_offsets[self.tp].offset),
           " EndOffset: ", int(self.cur_offset[self.tp]))
     number_of_msg_in_stream = int(self.cur_offset[self.tp]) - int(
         self.old_offsets[self.tp].offset)
     print("Count of Messages: ", number_of_msg_in_stream)
     for message in self.consumer:
         if int(message.offset) >= int(self.cur_offset[self.tp]):
             break
         r_msg = str(message.value.decode("utf-8"))
         tweet_text = json.loads(r_msg)
         manku_motwani.add(tweet_text)
     return None

Exemple #24

0

Afficher le fichier

   def __init__(self,topics,bootstrap_servers,serviceMongodb):
       self.topics=topics
       self.bootstrap_servers=bootstrap_servers
       self.serviceMongodb=serviceMongodb
       #建立连接
       try:
           self.kc= KafkaConsumer( bootstrap_servers=self.bootstrap_servers, group_id="group_"+topics.__str__())
           tp = TopicPartition(topics,0)
           self.kc.assign([tp])

           #读取上一次开始的位置
           lastPos=serviceMongodb.getDataByTopics("test")
           if lastPos is None:
               offset=0
               serviceMongodb.saveData(topics=topics,offset=0)
           else:
               offset=lastPos["offset"]+1

           self.kc.seek(partition=tp,offset=offset)
           app.logger.info("kafka服务器:" + bootstrap_servers + ":连接成功"+" topics:"+self.topics+" offset:"+str(offset))

       except Exception as e:
           app.logger.error("kafka服务器:"+bootstrap_servers+":连接失败"+" topics:"+self.topics+" "+e.args.__str__())

Exemple #25

0

Afficher le fichier

Fichier : monitor_block_and_transaction.py Projet : uncleguanghui/eth_kafka

def get_last_block() -> dict:
    """
    获得 kafka 的 block_topic 的最后一条数据
    :return:
    """
    topic = config.get('kafka', 'block_topic', fallback=None)
    if not topic:
        return {}
    logger.debug(f'开始检索 {topic} 里的数据')
    consumer = kafka_consumer(group_id=f'monitor_block')
    partitions = [
        TopicPartition(topic, p) for p in consumer.partitions_for_topic(topic)
    ]
    last_offset_per_partition = consumer.end_offsets(partitions)
    max_partition, max_offset = sorted(last_offset_per_partition.items(),
                                       key=lambda x: x[0])[-1]
    if max_offset > 0:
        consumer.assign([max_partition])
        consumer.seek(max_partition, offset=max_offset - 1)
        msg = next(consumer)
        logger.debug(f'{topic} 最新一条数据为 {msg.value}')
        return msg.value
    return {}

Exemple #26

0

Afficher le fichier

Fichier : similarity_consume.py Projet : AsChing/autodemo2

 def __init__(self):
     # topic
     self.topic_name = 'auction_similary_test'
     self.tagname = 'auction_similary_tag_ids_test'
     self.dictionary_path = 'deerwester_test.dict'
     # mysql
     self.conn = pymysql.Connect(**self.MYSQL_ASSET_TEST)
     self.cursor = self.conn.cursor(pymysql.cursors.DictCursor)
     # redis
     self.redis_conn = redis.Redis(**self.ONLINE_REDIS_PARAM)
     self.redis_key = 'auction_similary:' + self.topic_name
     date_str = str(datetime.date.today())
     self.redis_tag_key = self.tagname + ':' + date_str
     # kafka
     self.consumer = KafkaConsumer(group_id='group_similary',
                                   bootstrap_servers=self.KAFKA_SOCK)
     self.partition = TopicPartition(topic=self.topic_name, partition=0)
     self.consumer.assign([self.partition])
     self.record_one_day = True
     self.SOURCE_ID_LIMIT = (1, 3, 4, 5, 6, 10857, 10873)
     # 日志
     logging.warning('{} {}'.format(
         self.get_now_datetime('%Y-%m-%d %H:%M:%S'), '源诚相似度分析v1.6'))

Exemple #27

0

Afficher le fichier

def on_message(client, userdata, msg):
    global counter
    counter += 1
    print(f"Received message No: {counter}")

    parsed_msg = json.loads(msg.payload.decode('utf-8'))

    try:
        if parsed_msg['action'] == "RUN":
            kafka_consumer.run()

        if parsed_msg['action'] == "COMMIT":
            tp = TopicPartition(
                parsed_msg.kafka_commit_offsets.topic,
                parsed_msg.kafka_commit_offsets.message.partition,
            )
            oem = OffsetAndMetadata(parsed_msg.kafka_commit_offsets.offset,
                                    parsed_msg.kafka_commit_offsets.metadata)
            offsets = {tp, oem}
            kafka_consumer.commit(offsets)
    except Exception as ex:
        print(ex)
        pass

Exemple #28

0

Afficher le fichier

Fichier : consumer.py Projet : tthandb/flask-base

    def run(self):
        for message in self.consumer:
            print('%s:%d:%d: received' % (
                message.topic, message.partition, message.offset))
            try:
                print(message.value)
                self.__handle_action(message)
            except KeyboardInterrupt:
                print('Stopped')
            except UTimeoutError:
                print('timeout')
            except Exception as error:
                print(error)

            if not self.auto_commit:
                meta = self.consumer.partitions_for_topic(message.topic)
                partition = TopicPartition(message.topic, message.partition)
                offset = OffsetAndMetadata(message.offset + 1, meta)
                options = {partition: offset}
                self.consumer.commit(options)

            print('%s:%d:%d: committed\n' % (
                message.topic, message.partition, message.offset))

Exemple #29

0

Afficher le fichier

    def test_kafka_consumer__blocking(self):
        TIMEOUT_MS = 500
        consumer = self.kafka_consumer(auto_offset_reset='earliest',
                                       enable_auto_commit=False,
                                       consumer_timeout_ms=TIMEOUT_MS)

        # Manual assignment avoids overhead of consumer group mgmt
        consumer.unsubscribe()
        consumer.assign([TopicPartition(self.topic, 0)])

        # Ask for 5 messages, nothing in queue, block 500ms
        with Timer() as t:
            with self.assertRaises(StopIteration):
                msg = next(consumer)
        self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0)

        self.send_messages(0, range(0, 10))

        # Ask for 5 messages, 10 in queue. Get 5 back, no blocking
        messages = set()
        with Timer() as t:
            for i in range(5):
                msg = next(consumer)
                messages.add((msg.partition, msg.offset))
        self.assertEqual(len(messages), 5)
        self.assertLess(t.interval, TIMEOUT_MS / 1000.0)

        # Ask for 10 messages, get 5 back, block 500ms
        messages = set()
        with Timer() as t:
            with self.assertRaises(StopIteration):
                for i in range(10):
                    msg = next(consumer)
                    messages.add((msg.partition, msg.offset))
        self.assertEqual(len(messages), 5)
        self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0)
        consumer.close()

Exemple #30

0

Afficher le fichier

 def _handle_offset_fetch_response(self, future, response):
     offsets = {}
     for topic, partitions in response.topics:
         for partition, offset, metadata, error_code in partitions:
             tp = TopicPartition(topic, partition)
             error_type = Errors.for_code(error_code)
             if error_type is not Errors.NoError:
                 error = error_type()
                 log.debug(
                     "Group %s failed to fetch offset for partition"
                     " %s: %s", self.group_id, tp, error)
                 if error_type is Errors.GroupLoadInProgressError:
                     # just retry
                     future.failure(error)
                 elif error_type is Errors.NotCoordinatorForGroupError:
                     # re-discover the coordinator and retry
                     self.coordinator_dead(error_type())
                     future.failure(error)
                 elif error_type is Errors.UnknownTopicOrPartitionError:
                     log.warning(
                         "OffsetFetchRequest -- unknown topic %s"
                         " (have you committed any offsets yet?)", topic)
                     continue
                 else:
                     log.error("Unknown error fetching offsets for %s: %s",
                               tp, error)
                     future.failure(error)
                 return
             elif offset >= 0:
                 # record the position with the offset
                 # (-1 indicates no committed offset to fetch)
                 offsets[tp] = OffsetAndMetadata(offset, metadata)
             else:
                 log.debug(
                     "Group %s has no committed offset for partition"
                     " %s", self.group_id, tp)
     future.success(offsets)