Exemple #1
0
    def with_partitions(self, partitions_to_add):
        """Returns a copy of cluster metadata with partitions added"""
        new_metadata = ClusterMetadata(**self.config)
        new_metadata._brokers = copy.deepcopy(self._brokers)
        new_metadata._partitions = copy.deepcopy(self._partitions)
        new_metadata._broker_partitions = copy.deepcopy(self._broker_partitions)
        new_metadata._groups = copy.deepcopy(self._groups)
        new_metadata.internal_topics = copy.deepcopy(self.internal_topics)
        new_metadata.unauthorized_topics = copy.deepcopy(self.unauthorized_topics)

        for partition in partitions_to_add:
            new_metadata._partitions[partition.topic][partition.partition] = partition

            if partition.leader is not None and partition.leader != -1:
                new_metadata._broker_partitions[partition.leader].add(
                    TopicPartition(partition.topic, partition.partition))

        return new_metadata
Exemple #2
0
def test__unpack_message_set(fetcher):
    fetcher.config['check_crcs'] = False
    tp = TopicPartition('foo', 0)
    messages = [
        (None, b"a", None),
        (None, b"b", None),
        (None, b"c", None),
    ]
    memory_records = MemoryRecords(_build_record_batch(messages))
    records = list(fetcher._unpack_message_set(tp, memory_records))
    assert len(records) == 3
    assert all(map(lambda x: isinstance(x, ConsumerRecord), records))
    assert records[0].value == b'a'
    assert records[1].value == b'b'
    assert records[2].value == b'c'
    assert records[0].offset == 0
    assert records[1].offset == 1
    assert records[2].offset == 2
 async def set_assignments(self, internal_name: Tuple[str, str], content_type: str, request_data: dict):
     self.log.info("Updating assignments for %s to %r", internal_name, request_data)
     self._assert_consumer_exists(internal_name, content_type)
     self._assert_has_key(request_data, "partitions", content_type)
     partitions = []
     for el in request_data["partitions"]:
         convert_to_int(el, "partition", content_type)
         self._has_topic_and_partition_keys(el, content_type)
         partitions.append(TopicPartition(el["topic"], el["partition"]))
     async with self.consumer_locks[internal_name]:
         try:
             consumer = self.consumers[internal_name].consumer
             consumer.assign(partitions)
             self._update_partition_assignments(consumer)
             empty_response()
         except IllegalStateError as e:
             self._illegal_state_fail(message=str(e), content_type=content_type)
         finally:
             self.log.info("Done updating assignment")
Exemple #4
0
def consumer_partitions_for_topic(consumer, topic):
    """Returns a list of all TopicPartitions for a given topic.

    Arguments:
        consumer: an initialized KafkaConsumer
        topic: a topic name to fetch TopicPartitions for

    :returns:
        list(TopicPartition): A list of TopicPartitions that belong to the given topic
    """
    topic_partitions = []
    partitions = consumer.partitions_for_topic(topic)
    if partitions is not None:
        for partition in partitions:
            topic_partitions.append(TopicPartition(topic, partition))
    else:
        logging.error(
            "No partitions found for topic {}. Maybe it doesn't exist?".format(
                topic), )
    return topic_partitions
Exemple #5
0
 def prepare(self):
     topic = self._task.topic
     partitions = self._task.partitions
     self.consumer.unsubscribe()
     print('111', self.consumer.topics())
     if partitions:
         partitions = [
             TopicPartition(topic, int(part))
             for part in partitions.split(',')
         ]
         assert len(
             partitions
         ) == 1, 'current task should assigned only one partition'
         self.consumer.assign(partitions)
         # self.consumer.seek_to_beginning(*partitions)
         # self.consumer.seek(TopicPartition(topic, 0), 0)
     else:
         self.consumer.subscribe([topic])
     print('consumer topics', self._task, id(self.consumer),
           self.consumer.topics())
def get_last_message() -> Dict:
    """
    Returns the last message of the queue in the specified topic
    :return: Dict of message if any message else None
    """
    consumer = KafkaConsumer(bootstrap_servers=args.host,
                             value_deserializer=lambda m: json.loads(m.decode('ascii')),
                             enable_auto_commit=False,
                             auto_offset_reset='earliest')
    topic_partition = TopicPartition(topic=args.topic, partition=0)
    consumer.assign([topic_partition])
    consumer.seek_to_end(topic_partition)
    last_offset = consumer.position(topic_partition)
    if last_offset != 0:
        consumer.seek_to_beginning(topic_partition)
        for msg in consumer:
            if msg.offset == last_offset - 1:
                break
        return msg.value
    return {}
 async def seek_to(self, internal_name: Tuple[str, str], content_type: str, request_data: dict):
     self.log.info("Resetting offsets for %s to %r", internal_name, request_data)
     self._assert_consumer_exists(internal_name, content_type)
     self._assert_has_key(request_data, "offsets", content_type)
     seeks = []
     for el in request_data["offsets"]:
         self._assert_has_key(el, "topic", content_type)
         for k in ["offset", "partition"]:
             self._assert_has_key(el, k, content_type)
             convert_to_int(el, k, content_type)
         self._assert_positive_number(el, "offset", content_type)
         seeks.append((TopicPartition(topic=el["topic"], partition=el["partition"]), el["offset"]))
     async with self.consumer_locks[internal_name]:
         consumer = self.consumers[internal_name].consumer
         for part, offset in seeks:
             try:
                 consumer.seek(part, offset)
             except AssertionError:
                 self._illegal_state_fail(f"Partition {part} is unassigned", content_type)
         empty_response()
Exemple #8
0
def main():
    while True:
        try:
            consumer = KafkaConsumer(
                'test',
                bootstrap_servers=['kafka:9092'],
                auto_offset_reset='earliest',
                group_id='1',
                value_deserializer=lambda x: json.loads(x.decode('utf-8')))
            break
        except Exception:
            print('connection error')
            time.sleep(3)

    for message in consumer:
        print(message.value)

        tp = TopicPartition(message.topic, message.partition)
        offsets = {tp: OffsetAndMetadata(message.offset, None)}
        consumer.commit(offsets=offsets)
 async def get_offsets(self, internal_name: Tuple[str, str], content_type: str, request_data: dict):
     self.log.info("Retrieving offsets for %s", internal_name)
     self._assert_consumer_exists(internal_name, content_type)
     self._assert_has_key(request_data, "partitions", content_type)
     response = {"offsets": []}
     async with self.consumer_locks[internal_name]:
         consumer = self.consumers[internal_name].consumer
         for el in request_data["partitions"]:
             convert_to_int(el, "partition", content_type)
             tp = TopicPartition(el["topic"], el["partition"])
             commit_info = consumer.committed(tp, metadata=True)
             if not commit_info:
                 continue
             response["offsets"].append({
                 "topic": tp.topic,
                 "partition": tp.partition,
                 "metadata": commit_info.metadata,
                 "offset": commit_info.offset
             })
     KarapaceBase.r(body=response, content_type=content_type)
def test__handle_offset_response(fetcher, mocker):
    # Broker returns UnsupportedForMessageFormatError, will omit partition
    fut = Future()
    res = OffsetResponse[1]([
        ("topic", [(0, 43, -1, -1)]),
        ("topic", [(1, 0, 1000, 9999)])
    ])
    fetcher._handle_offset_response(fut, res)
    assert fut.succeeded()
    assert fut.value == {TopicPartition("topic", 1): (9999, 1000)}

    # Broker returns NotLeaderForPartitionError
    fut = Future()
    res = OffsetResponse[1]([
        ("topic", [(0, 6, -1, -1)]),
    ])
    fetcher._handle_offset_response(fut, res)
    assert fut.failed()
    assert isinstance(fut.exception, NotLeaderForPartitionError)

    # Broker returns UnknownTopicOrPartitionError
    fut = Future()
    res = OffsetResponse[1]([
        ("topic", [(0, 3, -1, -1)]),
    ])
    fetcher._handle_offset_response(fut, res)
    assert fut.failed()
    assert isinstance(fut.exception, UnknownTopicOrPartitionError)

    # Broker returns many errors and 1 result
    # Will fail on 1st error and return
    fut = Future()
    res = OffsetResponse[1]([
        ("topic", [(0, 43, -1, -1)]),
        ("topic", [(1, 6, -1, -1)]),
        ("topic", [(2, 3, -1, -1)]),
        ("topic", [(3, 0, 1000, 9999)])
    ])
    fetcher._handle_offset_response(fut, res)
    assert fut.failed()
    assert isinstance(fut.exception, NotLeaderForPartitionError)
Exemple #11
0
    def bash_consumer(self):
        self.consumer.assign([TopicPartition(topic=topic, partition=0)])

        for msg in self.consumer:
            msg_offset = msg.offset
            msg_value = (msg.value).decode("utf-8")
            msg_value = json.loads(msg_value)

            self.save_list.append(msg_value)

            save_data_size = sys.getsizeof(self.save_list)

            if save_data_size >= single_file_size * 1024 * 1024:
                save_file = self.log_file_path + "bash_history_log_" + str(
                    self.count)

                self.count += 1
                with open(save_file, "w+") as f:
                    for line in self.save_list:
                        f.write(line.decode("utf-8") + "\n")

            self.save_list = []
            print("*" * 10, "写入文件", "*" * 10)

            if save_to_tb:
                if self.tb_empty:
                    add_way = "replace"
                    self.tb_empty = False
                else:
                    add_way = "append"

                for k, v in msg_value.items():
                    msg_value[k] = [v]
                insert_df = pd.DataFrame.from_dict(msg_value)

                insert_df.to_sql(dbh_config["table"],
                                 self.dbh,
                                 if_exists=add_way,
                                 index=False)

            print("当前size", save_data_size, "\t", msg_value)
    def run(self):
        consumer = KafkaConsumer(
            bootstrap_servers=f'{self.remote_ip}:{self.remote_port}',
            api_version=settings.KAFKA_API_VERSION,
            auto_offset_reset='earliest',
            group_id="tweet_consumer")
        consumer.subscribe([settings.KAFKA_TOPIC_TWEETS])

        try:
            while True:
                for message in consumer:
                    decoded_message = str(message.value.decode('utf-8'))
                    tractor.prepare_tweet_and_push_to_elastic(decoded_message)

                    tp = TopicPartition(message.topic, message.partition)
                    oem = OffsetAndMetadata(message.offset, '')
                    consumer.commit({tp: oem})
        except:
            raise
        finally:
            consumer.close()
Exemple #13
0
    def open(self):
        super().initialize()

        self._consumer = KafkaConsumer(
            bootstrap_servers=self._bootstrap_servers,
            consumer_timeout_ms=self._timeout)

        # point to what we want at
        partition = TopicPartition(self._topic, self._partition)
        self._consumer.assign([partition])

        offset = None
        if self._begin_time:
            timestamp = self._begin_time * 1000
            offinfo = self._consumer.offsets_for_times({partition: timestamp})
            if offinfo == None or offinfo[partition] == None:
                raise ValueError(
                    "There is no data in the threat feed stream the begin date"
                )
            offset = offinfo[partition].offset
            self._consumer.seek(partition, offset)
Exemple #14
0
    def test_load_metadata(self, protocol, conn):

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567, None),
            BrokerMetadata(1, 'broker_2', 5678, None)
        ]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [
            (NO_ERROR, 'topic_1', [
                (NO_ERROR, 0, 1, [1, 2], [1, 2])
            ]),
            (NO_ERROR, 'topic_noleader', [
                (NO_LEADER, 0, -1, [], []),
                (NO_LEADER, 1, -1, [], []),
            ]),
            (NO_LEADER, 'topic_no_partitions', []),
            (UNKNOWN_TOPIC_OR_PARTITION, 'topic_unknown', []),
            (NO_ERROR, 'topic_3', [
                (NO_ERROR, 0, 0, [0, 1], [0, 1]),
                (NO_ERROR, 1, 1, [1, 0], [1, 0]),
                (NO_ERROR, 2, 0, [0, 1], [0, 1])
            ])
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        # client loads metadata at init
        client = SimpleClient(hosts=['broker_1:4567'])
        self.assertDictEqual({
            TopicPartition('topic_1', 0): brokers[1],
            TopicPartition('topic_noleader', 0): None,
            TopicPartition('topic_noleader', 1): None,
            TopicPartition('topic_3', 0): brokers[0],
            TopicPartition('topic_3', 1): brokers[1],
            TopicPartition('topic_3', 2): brokers[0]},
            client.topics_to_brokers)

        # if we ask for metadata explicitly, it should raise errors
        with self.assertRaises(LeaderNotAvailableError):
            client.load_metadata_for_topics('topic_no_partitions')

        with self.assertRaises(UnknownTopicOrPartitionError):
            client.load_metadata_for_topics('topic_unknown')

        # This should not raise
        client.load_metadata_for_topics('topic_no_leader')
def test_partition_records_offset():
    """Test that compressed messagesets are handle correctly
    when fetch offset is in the middle of the message list
    """
    batch_start = 120
    batch_end = 130
    fetch_offset = 123
    tp = TopicPartition('foo', 0)
    messages = [ConsumerRecord(tp.topic, tp.partition, i,
                               None, None, 'key', 'value', [], 'checksum', 0, 0, -1)
                for i in range(batch_start, batch_end)]
    records = Fetcher.PartitionRecords(fetch_offset, None, messages)
    assert len(records) > 0
    msgs = records.take(1)
    assert msgs[0].offset == fetch_offset
    assert records.fetch_offset == fetch_offset + 1
    msgs = records.take(2)
    assert len(msgs) == 2
    assert len(records) > 0
    records.discard()
    assert len(records) == 0
Exemple #16
0
    def test_with_limited_retries(self):

        # lets create a queue and add 10 messages for 10 different partitions
        # to show how retries should work ideally
        for i in range(10):
            self.queue.put((TopicPartition("test", i), "msg %i" % i, "key %i" % i))

        def send_side_effect(reqs, *args, **kwargs):
            return [FailedPayloadsError(req) for req in reqs]

        self.client.send_produce_request.side_effect = send_side_effect

        self._run_process(3, 3)

        # the queue should be void at the end of the test
        self.assertEqual(self.queue.empty(), True)

        # there should be 16 non-void calls:
        # 3 initial batches of 3 msgs each + 1 initial batch of 1 msg +
        # 3 retries of the batches above = (1 + 3 retries) * 4 batches = 16
        self.assertEqual(self.client.send_produce_request.call_count, 16)
Exemple #17
0
    def parse_member_metadata(cls, metadata):
        """
        Parses member metadata into a python object.
        This implementation only serializes and deserializes the StickyAssignorMemberMetadataV1 user data,
        since no StickyAssignor written in Python was deployed ever in the wild with version V0, meaning that
        there is no need to support backward compatibility with V0.

        Arguments:
          metadata (MemberMetadata): decoded metadata for a member of the group.

        Returns:
          parsed metadata (StickyAssignorMemberMetadataV1)
        """
        user_data = metadata.user_data
        if not user_data:
            return StickyAssignorMemberMetadataV1(
                partitions=[],
                generation=cls.DEFAULT_GENERATION_ID,
                subscription=metadata.subscription)

        try:
            decoded_user_data = StickyAssignorUserDataV1.decode(user_data)
        except Exception as e:
            # ignore the consumer's previous assignment if it cannot be parsed
            log.error("Could not parse member data", e)  # pylint: disable=logging-too-many-args
            return StickyAssignorMemberMetadataV1(
                partitions=[],
                generation=cls.DEFAULT_GENERATION_ID,
                subscription=metadata.subscription)

        member_partitions = []
        for topic, partitions in decoded_user_data.previous_assignment:  # pylint: disable=no-member
            member_partitions.extend(
                [TopicPartition(topic, partition) for partition in partitions])
        return StickyAssignorMemberMetadataV1(
            # pylint: disable=no-member
            partitions=member_partitions,
            generation=decoded_user_data.generation,
            subscription=metadata.subscription)
Exemple #18
0
    async def wait_no_lag(self):
        print("Ensuring no lag")
        consumer_group = "f-simple"
        client = self.client
        await self.client.bootstrap()
        source = self.source
        source_builder = self._source_builder

        source_highwaters = source_builder._highwaters()
        source_tps = source_builder._assignment
        protocol_tps = [(source, [tp.partition for tp in source_tps])]

        node_id = next(broker.nodeId for broker in client.cluster.brokers())
        coordinator_request = GroupCoordinatorRequest_v0(consumer_group)
        coordinator_response = await client.send(node_id, coordinator_request)
        coordinator_id = coordinator_response.coordinator_id

        while True:
            consumer_offsets_req = OffsetFetchRequest_v1(
                consumer_group, protocol_tps)
            consumer_offsets_resp = await client.send(coordinator_id,
                                                      consumer_offsets_req)
            topics = consumer_offsets_resp.topics
            assert len(topics) == 1, f"{topics!r}"
            topic, partition_resps = topics[0]
            assert topic == source, f"{source}"
            assert len(partition_resps) == len(source_tps)

            # + 1 is to account for the difference in how faust commits
            positions = {
                TopicPartition(topic=source, partition=partition): offset + 1
                for partition, offset, _, _ in partition_resps
            }

            if positions != source_highwaters:
                print("There is lag. Waiting!")
                await asyncio.sleep(2.0)
            else:
                return
Exemple #19
0
def topic_offsets(kafka_brokers, topic):
    client = SimpleClient(insure_is_array(kafka_brokers))
    topic_partitions = client.topic_partitions
    if topic not in topic_partitions:
        raise KafkaException("topic {} doesn't exists".format(topic))
    partitions = topic_partitions[topic]
    offset_requests = [
        OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
    ]
    offsets_responses = client.send_offset_request(offset_requests)
    client.close()
    partitions_and_offsets = {}
    for offset in offsets_responses:
        if offset.topic == topic:
            topic_offset = 0
            topic_partition = TopicPartition(topic=offset.topic,
                                             partition=offset.partition)
            if offset.offsets[0]:
                topic_offset = offset.offsets[0]
            partitions_and_offsets[topic_partition] = topic_offset

    return partitions_and_offsets
    def _get_consumer(self):
        enable_auto_commit = self.consumer_params.pop("enable_auto_commit",
                                                      False)
        auto_offset_reset = self.consumer_params.pop("auto_offset_reset",
                                                     "earliest")
        consumer_timeout_ms = self.consumer_params.pop("consumer_timeout_ms",
                                                       10 * 1000)
        max_poll_records = self.consumer_params.pop("max_poll_records", 1)
        value_deserializer = self.consumer_params.pop(
            "value_deserializer", lambda x: json.loads(x.decode("utf-8")))

        consumer = KafkaConsumer(group_id=self.group_id,
                                 bootstrap_servers=self.bootstrap_servers,
                                 consumer_timeout_ms=consumer_timeout_ms,
                                 enable_auto_commit=enable_auto_commit,
                                 auto_offset_reset=auto_offset_reset,
                                 max_poll_records=max_poll_records,
                                 value_deserializer=value_deserializer,
                                 **self.consumer_params)
        consumer.assign([TopicPartition(self.topic, self.partition)])

        return consumer
Exemple #21
0
def test_fetch_committed_offsets(mocker, coordinator):

    # No partitions, no IO polling
    mocker.patch.object(coordinator._client, 'poll')
    assert coordinator.fetch_committed_offsets([]) == {}
    assert coordinator._client.poll.call_count == 0

    # general case -- send offset fetch request, get successful future
    mocker.patch.object(coordinator, 'ensure_coordinator_known')
    mocker.patch.object(coordinator,
                        '_send_offset_fetch_request',
                        return_value=Future().success('foobar'))
    partitions = [TopicPartition('foobar', 0)]
    ret = coordinator.fetch_committed_offsets(partitions)
    assert ret == 'foobar'
    coordinator._send_offset_fetch_request.assert_called_with(partitions)
    assert coordinator._client.poll.call_count == 1

    # Failed future is raised if not retriable
    coordinator._send_offset_fetch_request.return_value = Future().failure(
        AssertionError)
    coordinator._client.poll.reset_mock()
    try:
        coordinator.fetch_committed_offsets(partitions)
    except AssertionError:
        pass
    else:
        assert False, 'Exception not raised when expected'
    assert coordinator._client.poll.call_count == 1

    coordinator._client.poll.reset_mock()
    coordinator._send_offset_fetch_request.side_effect = [
        Future().failure(Errors.RequestTimedOutError),
        Future().success('fizzbuzz')
    ]

    ret = coordinator.fetch_committed_offsets(partitions)
    assert ret == 'fizzbuzz'
    assert coordinator._client.poll.call_count == 2  # call + retry
Exemple #22
0
    def test_async_producer_not_leader(self):

        for i in range(10):
            self.queue.put((TopicPartition("test", i), "msg %i", "key %i"))

        # Mock offsets counter for closure
        offsets = collections.defaultdict(
            lambda: collections.defaultdict(lambda: 0))
        self.client.is_first_time = True

        def send_side_effect(reqs, *args, **kwargs):
            if self.client.is_first_time:
                self.client.is_first_time = False
                return [
                    ProduceResponsePayload(req.topic, req.partition,
                                           NotLeaderForPartitionError.errno,
                                           -1) for req in reqs
                ]

            responses = []
            for req in reqs:
                offset = offsets[req.topic][req.partition]
                offsets[req.topic][req.partition] += len(req.messages)
                responses.append(
                    ProduceResponsePayload(req.topic, req.partition, 0,
                                           offset))
            return responses

        self.client.send_produce_request.side_effect = send_side_effect

        self._run_process(2)

        # the queue should be void at the end of the test
        self.assertEqual(self.queue.empty(), True)

        # there should be 5 non-void calls: 1st failed batch of 3 msgs
        # + 3 batches of 3 msgs each + 1 batch of 1 msg = 1 + 3 + 1 = 5
        self.assertEqual(self.client.send_produce_request.call_count, 5)
Exemple #23
0
 def setupTable(self, manku_motwani, topic_name, minutes=1440):
     self.consumer = KafkaConsumer(bootstrap_servers=['localhost:9092'])
     self.tp = TopicPartition(topic_name, 0)
     self.cur_offset = self.consumer.end_offsets([self.tp])
     current_time = datetime.datetime.now()
     old_time = current_time - datetime.timedelta(minutes=minutes)
     old_epoch_ts = int(old_time.timestamp() * 1000)  # in miliseconds
     self.old_offsets = self.consumer.offsets_for_times(
         {self.tp: old_epoch_ts})
     self.consumer.assign([self.tp])
     self.consumer.seek(self.tp, int(self.old_offsets[self.tp].offset))
     print("StartOffset: ", int(self.old_offsets[self.tp].offset),
           " EndOffset: ", int(self.cur_offset[self.tp]))
     number_of_msg_in_stream = int(self.cur_offset[self.tp]) - int(
         self.old_offsets[self.tp].offset)
     print("Count of Messages: ", number_of_msg_in_stream)
     for message in self.consumer:
         if int(message.offset) >= int(self.cur_offset[self.tp]):
             break
         r_msg = str(message.value.decode("utf-8"))
         tweet_text = json.loads(r_msg)
         manku_motwani.add(tweet_text)
     return None
Exemple #24
0
   def __init__(self,topics,bootstrap_servers,serviceMongodb):
       self.topics=topics
       self.bootstrap_servers=bootstrap_servers
       self.serviceMongodb=serviceMongodb
       #建立连接
       try:
           self.kc= KafkaConsumer( bootstrap_servers=self.bootstrap_servers, group_id="group_"+topics.__str__())
           tp = TopicPartition(topics,0)
           self.kc.assign([tp])

           #读取上一次开始的位置
           lastPos=serviceMongodb.getDataByTopics("test")
           if lastPos is None:
               offset=0
               serviceMongodb.saveData(topics=topics,offset=0)
           else:
               offset=lastPos["offset"]+1

           self.kc.seek(partition=tp,offset=offset)
           app.logger.info("kafka服务器:" + bootstrap_servers + ":连接成功"+" topics:"+self.topics+" offset:"+str(offset))

       except Exception as e:
           app.logger.error("kafka服务器:"+bootstrap_servers+":连接失败"+" topics:"+self.topics+" "+e.args.__str__())
def get_last_block() -> dict:
    """
    获得 kafka 的 block_topic 的最后一条数据
    :return:
    """
    topic = config.get('kafka', 'block_topic', fallback=None)
    if not topic:
        return {}
    logger.debug(f'开始检索 {topic} 里的数据')
    consumer = kafka_consumer(group_id=f'monitor_block')
    partitions = [
        TopicPartition(topic, p) for p in consumer.partitions_for_topic(topic)
    ]
    last_offset_per_partition = consumer.end_offsets(partitions)
    max_partition, max_offset = sorted(last_offset_per_partition.items(),
                                       key=lambda x: x[0])[-1]
    if max_offset > 0:
        consumer.assign([max_partition])
        consumer.seek(max_partition, offset=max_offset - 1)
        msg = next(consumer)
        logger.debug(f'{topic} 最新一条数据为 {msg.value}')
        return msg.value
    return {}
 def __init__(self):
     # topic
     self.topic_name = 'auction_similary_test'
     self.tagname = 'auction_similary_tag_ids_test'
     self.dictionary_path = 'deerwester_test.dict'
     # mysql
     self.conn = pymysql.Connect(**self.MYSQL_ASSET_TEST)
     self.cursor = self.conn.cursor(pymysql.cursors.DictCursor)
     # redis
     self.redis_conn = redis.Redis(**self.ONLINE_REDIS_PARAM)
     self.redis_key = 'auction_similary:' + self.topic_name
     date_str = str(datetime.date.today())
     self.redis_tag_key = self.tagname + ':' + date_str
     # kafka
     self.consumer = KafkaConsumer(group_id='group_similary',
                                   bootstrap_servers=self.KAFKA_SOCK)
     self.partition = TopicPartition(topic=self.topic_name, partition=0)
     self.consumer.assign([self.partition])
     self.record_one_day = True
     self.SOURCE_ID_LIMIT = (1, 3, 4, 5, 6, 10857, 10873)
     # 日志
     logging.warning('{} {}'.format(
         self.get_now_datetime('%Y-%m-%d %H:%M:%S'), '源诚相似度分析v1.6'))
Exemple #27
0
def on_message(client, userdata, msg):
    global counter
    counter += 1
    print(f"Received message No: {counter}")

    parsed_msg = json.loads(msg.payload.decode('utf-8'))

    try:
        if parsed_msg['action'] == "RUN":
            kafka_consumer.run()

        if parsed_msg['action'] == "COMMIT":
            tp = TopicPartition(
                parsed_msg.kafka_commit_offsets.topic,
                parsed_msg.kafka_commit_offsets.message.partition,
            )
            oem = OffsetAndMetadata(parsed_msg.kafka_commit_offsets.offset,
                                    parsed_msg.kafka_commit_offsets.metadata)
            offsets = {tp, oem}
            kafka_consumer.commit(offsets)
    except Exception as ex:
        print(ex)
        pass
Exemple #28
0
    def run(self):
        for message in self.consumer:
            print('%s:%d:%d: received' % (
                message.topic, message.partition, message.offset))
            try:
                print(message.value)
                self.__handle_action(message)
            except KeyboardInterrupt:
                print('Stopped')
            except UTimeoutError:
                print('timeout')
            except Exception as error:
                print(error)

            if not self.auto_commit:
                meta = self.consumer.partitions_for_topic(message.topic)
                partition = TopicPartition(message.topic, message.partition)
                offset = OffsetAndMetadata(message.offset + 1, meta)
                options = {partition: offset}
                self.consumer.commit(options)

            print('%s:%d:%d: committed\n' % (
                message.topic, message.partition, message.offset))
Exemple #29
0
    def test_kafka_consumer__blocking(self):
        TIMEOUT_MS = 500
        consumer = self.kafka_consumer(auto_offset_reset='earliest',
                                       enable_auto_commit=False,
                                       consumer_timeout_ms=TIMEOUT_MS)

        # Manual assignment avoids overhead of consumer group mgmt
        consumer.unsubscribe()
        consumer.assign([TopicPartition(self.topic, 0)])

        # Ask for 5 messages, nothing in queue, block 500ms
        with Timer() as t:
            with self.assertRaises(StopIteration):
                msg = next(consumer)
        self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0)

        self.send_messages(0, range(0, 10))

        # Ask for 5 messages, 10 in queue. Get 5 back, no blocking
        messages = set()
        with Timer() as t:
            for i in range(5):
                msg = next(consumer)
                messages.add((msg.partition, msg.offset))
        self.assertEqual(len(messages), 5)
        self.assertLess(t.interval, TIMEOUT_MS / 1000.0)

        # Ask for 10 messages, get 5 back, block 500ms
        messages = set()
        with Timer() as t:
            with self.assertRaises(StopIteration):
                for i in range(10):
                    msg = next(consumer)
                    messages.add((msg.partition, msg.offset))
        self.assertEqual(len(messages), 5)
        self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0)
        consumer.close()
Exemple #30
0
 def _handle_offset_fetch_response(self, future, response):
     offsets = {}
     for topic, partitions in response.topics:
         for partition, offset, metadata, error_code in partitions:
             tp = TopicPartition(topic, partition)
             error_type = Errors.for_code(error_code)
             if error_type is not Errors.NoError:
                 error = error_type()
                 log.debug(
                     "Group %s failed to fetch offset for partition"
                     " %s: %s", self.group_id, tp, error)
                 if error_type is Errors.GroupLoadInProgressError:
                     # just retry
                     future.failure(error)
                 elif error_type is Errors.NotCoordinatorForGroupError:
                     # re-discover the coordinator and retry
                     self.coordinator_dead(error_type())
                     future.failure(error)
                 elif error_type is Errors.UnknownTopicOrPartitionError:
                     log.warning(
                         "OffsetFetchRequest -- unknown topic %s"
                         " (have you committed any offsets yet?)", topic)
                     continue
                 else:
                     log.error("Unknown error fetching offsets for %s: %s",
                               tp, error)
                     future.failure(error)
                 return
             elif offset >= 0:
                 # record the position with the offset
                 # (-1 indicates no committed offset to fetch)
                 offsets[tp] = OffsetAndMetadata(offset, metadata)
             else:
                 log.debug(
                     "Group %s has no committed offset for partition"
                     " %s", self.group_id, tp)
     future.success(offsets)