def get_kafka_old_offset(topic, kafka_broker, partition_count):
    '''获取kafka 最旧的offset 用来跟后面的 batch_loader 所读取到的offset 做对比'''
    kafka_old_offset = {}
    kafka_new_offset = {}

    try:
        #使用kafka 库来获取的方式
        '''
        from kafka import SimpleClient
        from kafka.protocol.offset import OffsetRequest, OffsetResetStrategy
        from kafka.common import OffsetRequestPayload
        client = SimpleClient(broker_list)
        partitions = client.topic_partitions[topic]
        offset_requests = [OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()]
        offsets_responses = client.send_offset_request(offset_requests)
        for r in offsets_responses:
            #print("partition = %s, offset = %s"%(r.partition, r.offsets[0]))
            kafka_old_offset[r.partition] = r.offsets[0]
       '''
        from confluent_kafka import TopicPartition, Consumer, KafkaException
        from confluent_kafka.admin import AdminClient

        conf = {'bootstrap.servers': kafka_broker, 'session.timeout.ms': 6000}
        try:
            admin_client = AdminClient(conf)
            consumer_client = Consumer(conf)

            md = admin_client.list_topics(timeout=10)
            for t in iter(md.topics.values()):
                if str(t) == topic:
                    for p in iter(t.partitions.values()):
                        td = TopicPartition(str(t), p.id)
                        oldest_offset, newest_offset = consumer_client.get_watermark_offsets(
                            td)
                        kafka_old_offset[p.id] = oldest_offset
                        kafka_new_offset[p.id] = newest_offset
        except KafkaException as e:
            logger.error("请检查kafka是否存活:%s" % e)
    except ImportError:
        for partition_id in range(partition_count):
            command = 'kafka-run-class kafka.tools.GetOffsetShell --topic %s --broker-list %s --time -2 --partition %d' % (
                topic, kafka_broker, partition_id)
            args = shlex.split(command)
            process = subprocess.Popen(args,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)
            output = '{}'.format(
                process.stdout.read().decode(encoding='UTF-8'))
            offset = output.split(':')[2]
            kafka_old_offset[partition_id] = int(offset)

    return kafka_old_offset
Exemple #2
0
def test_delivery_report_serialization(kafka_cluster, load_avsc, avsc, data, record_type):
    """
    Tests basic Avro serializer functionality

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture
        load_avsc (callable(str)): Avro file reader
        avsc (str) avsc: Avro schema file
        data (object): data to be serialized

    Raises:
        AssertionError on test failure

    """
    topic = kafka_cluster.create_topic("serialization-avro-dr")
    sr = kafka_cluster.schema_registry()
    schema_str = load_avsc(avsc)

    value_serializer = AvroSerializer(sr, schema_str)

    value_deserializer = AvroDeserializer(sr, schema_str)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    def assert_cb(err, msg):
        actual = value_deserializer(SerializationContext(topic,
                                                         MessageField.VALUE),
                                    msg.value())

        if record_type == "record":
            assert [v == actual[k] for k, v in data.items()]
        elif record_type == 'float':
            assert data == pytest.approx(actual)
        else:
            assert actual == data

    producer.produce(topic, value=data, partition=0, on_delivery=assert_cb)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    actual = msg.value()

    # schema may include default which need not exist in the original
    if record_type == 'record':
        assert [v == actual[k] for k, v in data.items()]
    elif record_type == 'float':
        assert data == pytest.approx(actual)
    else:
        assert actual == data
Exemple #3
0
def listen_for_messages(msg, consumer, application_source_id):  # noqa: C901
    """
    Listen for Platform-Sources kafka messages.

    Args:
        consumer (Consumer): Kafka consumer object
        application_source_id (Integer): Cost Management's current Application Source ID. Used for
            kafka message filtering.

    Returns:
        None

    """
    try:
        try:
            msg = get_sources_msg_data(msg, application_source_id)
            offset = msg.get("offset")
            partition = msg.get("partition")
        except SourcesMessageError:
            return
        if msg:
            LOG.info(
                f"Processing message offset: {offset} partition: {partition}")
            topic_partition = TopicPartition(topic=Config.SOURCES_TOPIC,
                                             partition=partition,
                                             offset=offset)
            LOG.info(f"Cost Management Message to process: {str(msg)}")
            try:
                with transaction.atomic():
                    process_message(application_source_id, msg)
                    consumer.commit()
            except (InterfaceError, OperationalError) as err:
                close_and_set_db_connection()
                LOG.error(f"{type(err).__name__}: {err}")
                rewind_consumer_to_retry(consumer, topic_partition)
            except (IntegrityError, SourcesHTTPClientError) as err:
                LOG.error(f"{type(err).__name__}: {err}")
                rewind_consumer_to_retry(consumer, topic_partition)
            except SourceNotFoundError:
                LOG.warning(
                    f"Source not found in platform sources. Skipping msg: {msg}"
                )
                consumer.commit()

    except KafkaError as error:
        LOG.error(
            f"[listen_for_messages] Kafka error encountered: {type(error).__name__}: {error}",
            exc_info=True)
    except Exception as error:
        LOG.error(
            f"[listen_for_messages] UNKNOWN error encountered: {type(error).__name__}: {error}",
            exc_info=True)
Exemple #4
0
    def read_from_offset(self, offset=1000):
        c = AvroConsumer(
            dict(
                self.base_config, **{
                    'group.id': 'groupid-1',
                    'default.topic.config': {
                        'auto.offset.reset': 'beginning',
                        'auto.commit.enable': 'false'
                    }
                }))

        c.assign([TopicPartition(self.topic, partition=0, offset=offset)])
        return self.run_loop(c, return_message=True, file_object=False)
Exemple #5
0
    def _subscribe(self):
        """
        Subscribe to Kafka topics.

        A workaround for missing Zookeeper support in confluent-python is required here.
        Automatic partition rebalancing is not working with Kafka Versions < 0.9.0.
        Therefore we manually assign the partitions to the consumer for legacy Kafka versions.
        """
        if self.broker_version < self.KAFKA_VERSION_ZOOKEEPER_OPTIONAL:
            self.consumer.assign(
                [TopicPartition(self.topic, p) for p in range(0, 10)])
        else:
            self.consumer.subscribe([self.topic])
Exemple #6
0
    def __reset_pos(self):
        logger = logging.getLogger()

        if self.type == "None":
            return

        parts = [TopicPartition(self.topic, 0)]
        (start, end) = self.cons.get_watermark_offsets(parts[0])
        logger.debug("Currently at {}/{} offset <{}, {}>".format(
            parts[0].topic, parts[0].partition, start, end))
        if end > 0:
            parts[0].offset = end - 1
            self.cons.seek(parts[0])
Exemple #7
0
    def getTPOs(self, topics):
        """Use the AdminAPI to return a list of TopicParition objects for a list of topics
    """

        self.logger.info(
            f"Getting TPOs for {len(topics)} topics via admin API...")
        tpos = []
        for t in topics:
            for p in self._admin.list_topics(t).topics[t].partitions:
                tpos.append(TopicPartition(t, p))

        self.logger.info(f"Found {len(tpos)} TPOs for {len(topics)} topics.")
        return tpos
Exemple #8
0
    def prepareCommit(self, message):
        topic = message.topic()
        partition = message.partition()
        offset = message.offset()
        key = message.key()

        cacheKey = topic + '_' + str(partition)

        if cacheKey in self.commitCache:
            self.commitCache[cacheKey].offset = offset
        else:
            self.commitCache[cacheKey] = TopicPartition(
                topic, partition, offset)
Exemple #9
0
    def commit_offsets(self, partitions=None):
        if self.offsets and self.consumer:
            if partitions is None:
                partitions = self.offsets.keys()
            to_commit = []
            for partition in partitions:
                offset = self.offsets.get(partition)
                if offset is None:
                    # Skip partitions that have no offset
                    continue
                to_commit.append(TopicPartition(self.topic, partition, offset))

            self.consumer.commit(offsets=to_commit)
Exemple #10
0
def morning_notice():
	#(rise_ratio_list_smallest, rise_ratio_list_largest) = consumer.get_watermark_offsets(TopicPartition('eastmoney', 0))
	(volume_list_smallest, volume_list_largest) = consumer.get_watermark_offsets(TopicPartition('eastmoney', 0))
	try:
		#consumer.assign([TopicPartition('eastmoney', 0, rise_ratio_list_largest-1)])
		#consumer.seek(TopicPartition('eastmoney', 0, rise_ratio_list_largest-1))
		# consumer.seek(TopicPartition('eastmoney', 0, rise_ratio_list_largest-1))
		# latest_rise_ratio = json.loads(consumer.poll(1.0).value())["data"]
		#latest_rise_ratio = pd.read_json(json.loads(consumer.poll(1.0).value())["data"]).sort_index()
		#latest_rise_ratio["涨幅%"] = latest_rise_ratio["涨幅%"].map(lambda x: float(x.replace('----', '0.00')))
		#print(latest_rise_ratio.head(10))
		last_data_point = volume_list_largest-1
		consumer.assign([TopicPartition('eastmoney', 0, last_data_point)])
		consumer.seek(TopicPartition('eastmoney', 0, last_data_point))
		all_data = json.loads(consumer.poll(3.0).value())
		latest_volume = pd.read_json(all_data["data"]).sort_index()
		latest_volume["涨幅%"] = latest_volume["涨幅%"].map(lambda x: float(x.replace('----', '0.00')))
		result = latest_volume.head(100).sort_values("涨幅%", ascending = False).head(20)
		print("| 当前时间: " + time.strftime("%Y-%m-%d %H:%M:%S") + " | " + "数据更新时间: " + time.strftime("%Y-%m-%d %H:%M:%S" ,time.localtime(all_data["timestamp"]/1000000000)) + " |")
		print(tabulate(result, headers='keys', tablefmt='psql'))
	finally:
		consumer.close()
Exemple #11
0
        def assignment_callback(consumer, assignment):
            # Since ``auto.offset.reset`` is set to ``error`` to force human
            # interaction on an offset reset, we have to explicitly specify the
            # starting offset if no offset has been committed for this topic during
            # the ``__consumer_offsets`` topic retention period.
            assignment = {(i.topic, i.partition): self.__positions.get(
                (i.topic, i.partition))
                          for i in assignment}

            for i in self.__consumer.committed([
                    TopicPartition(topic, partition)
                    for (topic, partition), offset in assignment.items()
                    if offset is None
            ]):
                k = (i.topic, i.partition)
                if i.offset > -1:
                    assignment[k] = i.offset
                else:
                    assignment[k] = self.initial_offset_reset(
                        consumer, i.topic, i.partition)

            self.__consumer.assign([
                TopicPartition(topic, partition, offset)
                for (topic, partition), offset in assignment.items()
            ])

            for (topic, partition), offset in assignment.items():
                # Setting the local offsets will either cause the partition to be
                # paused (if the remote offset is unknown or the local offset is
                # not trailing the remote offset) or resumed.
                self.__partition_state_manager.set_local_offset(
                    topic, partition, offset)
                self.__positions[(topic, partition)] = offset

            if on_assign is not None:
                on_assign(self, [
                    TopicPartition(topic, partition)
                    for topic, partition in assignment.keys()
                ])
def test_calling_store_offsets_after_close_throws_erro():
    """ calling store_offset after close should throw RuntimeError """

    c = Consumer({
        'group.id': 'test',
        'enable.auto.commit': True,
        'enable.auto.offset.store': False,
        'socket.timeout.ms': 50,
        'session.timeout.ms': 100
    })

    c.subscribe(["test"])
    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.store_offsets(offsets=[TopicPartition("test", 0, 42)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.offsets_for_times([TopicPartition("test", 0)])
    assert 'Consumer closed' == str(ex.value)
Exemple #13
0
    def __init__(self,
                 my_id=1,
                 bootstrap_servers='',
                 list_of_partitions=[],
                 request_topic='',
                 inference_topic='',
                 group_id='my_grp'):
        """ Constructor
        :type interval: int
        :param interval: Check interval, in seconds
        """
        self.model = None
        #Create the model instance here
        self.my_id = my_id
        self.t = request_topic
        self.result_t = inference_topic
        self.my_grp_id = group_id
        self.result_t_p = 8
        self.bootstrap_servers = bootstrap_servers

        self.tls = []
        x = 0
        for i in list_of_partitions:
            self.tls.insert(x, TopicPartition(self.t, i))
            x = x + 1
        #self.tls=list_of_partitions
        print(self.tls)
        conf = {
            'bootstrap.servers': bootstrap_servers,
            'sasl.mechanism': 'PLAIN',
            'security.protocol': 'SASL_SSL',
            'ssl.ca.location': '/tmp/cacert.pem',
            'sasl.username': '******',
            'sasl.password': '******',
            # 'key.serializer': StringSerializer('utf_8'),
            # 'value.serializer': StringSerializer('utf_8'),
            'client.id': 'test-sw-1'
        }

        self.producer = Producer(conf)
        conf = {
            'bootstrap.servers': bootstrap_servers,
            #'sasl.mechanism': 'PLAIN',
            'sasl.username': '******',
            'sasl.password': '******',
            'ssl.ca.location': '/tmp/cacert.pem',
            'group.id': group_id,
            'auto.offset.reset': 'smallest'
        }
        self.consumer = consumer = Consumer(conf)
        self.consumer.assign(self.tls)
def test_sort():
    """ TopicPartition sorting (rich comparator) """

    # sorting uses the comparator
    correct = [TopicPartition('topic1', 3),
               TopicPartition('topic3', 0),
               TopicPartition('topicA', 5),
               TopicPartition('topicA', 5)]

    tps = sorted([TopicPartition('topicA', 5),
                  TopicPartition('topic3', 0),
                  TopicPartition('topicA', 5),
                  TopicPartition('topic1', 3)])

    assert correct == tps
    def __init__(self, my_id=1, bootstrap_servers='', list_of_partitions=[], request_topic='', inference_topic='', group_id='my_grp'):
        """ Constructor
        :type interval: int
        :param interval: Check interval, in seconds
        """
        self.model = tree.HoeffdingTreeClassifier(max_depth=10) 
            # compose.Pipeline(
            # preprocessing.MinMaxScaler(),
            # anomaly.HalfSpaceTrees(seed=42)) 
        self.metric = metrics.ROCAUC() # metrics.Accuracy() # 
        self.my_id = my_id
        self.t = request_topic
        self.result_t = inference_topic
        self.my_grp_id = group_id
        self.result_t_p = 8
        self.bootstrap_servers = bootstrap_servers
#         self.list_of_partitions = list_of_partitions

        self.tls = []
        x = 0
        for i in list_of_partitions:
            self.tls.insert(x, TopicPartition(self.t, i))
            x = x+1
        #self.tls=list_of_partitions
        print(self.tls)
        
        conf = {'bootstrap.servers': bootstrap_servers,
                'sasl.mechanism': 'PLAIN',
                'security.protocol': 'SASL_SSL',
                'ssl.ca.location': '/tmp/cacert.pem',
                'sasl.username': '******',
                'sasl.password': '******',
#                 'sasl.username': '******',
#                 'sasl.password': '******',
                # 'key.serializer': StringSerializer('utf_8'),
                # 'value.serializer': StringSerializer('utf_8'),
                
                'client.id': 'test-sw-1'}
        
        self.producer = Producer(conf)
        conf = {'bootstrap.servers': bootstrap_servers,
                'sasl.mechanism': 'PLAIN',
                'security.protocol': 'SASL_SSL',
                'sasl.username': '******',
                'sasl.password': '******',
                'ssl.ca.location': '/tmp/cacert.pem',
                'group.id': group_id,
                'auto.offset.reset': 'latest'}
        self.consumer = consumer = Consumer(conf)
        self.consumer.assign(self.tls)
Exemple #16
0
def main():
    # parse and check command line args
    parser = argparse.ArgumentParser(
        epilog=
        """Description:
           Plays and optionaly dumps video from a jpeg topic (a topic that ends with Image.jpg)."""
        , formatter_class=RawTextHelpFormatter
    )
    parser.add_argument("broker", help="The name of the kafka broker.", type=str)
    parser.add_argument("topic", help="The name of topic (*.Image.jpg).", type=str)
    parser.add_argument('-f', "--full_screen", action='store_true')
    parser.add_argument('-d', "--dump", help="if set images are stored in jpg files", action='store_true')
    parser.add_argument('-o', "--offset", type=int, default=-1)
    args = parser.parse_args()
    if not args.topic.endswith(".Image.jpg"):
        raise argparse.ArgumentTypeError('The topic must be a jpeg image topic (should end with .Image.jpg)')

    # handle full screen
    window_name = args.topic
    if args.full_screen:
        cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
        cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

    # calc start time and create consumer
    c = Consumer({'bootstrap.servers': args.broker, 'group.id': 'display', 'auto.offset.reset': 'latest'})
    c.assign([TopicPartition(topic=args.topic, partition=0, offset=args.offset)])

    # read frames and show (or dump) them
    while True:
        msg = c.poll(1.0)

        if msg is None:
            continue
        if msg.error():
            print("Consumer error: {}".format(msg.error()))
            continue

        time = msg.timestamp()[1]
        img = decode_image_message(msg)
        if type(img) == np.ndarray:
            if args.dump:
                cv2.imwrite(args.topic + "_" + str(time) + ".jpg", img)
            cv2.imshow(window_name, img)
        k = cv2.waitKey(33)
        if k == 113:  # The 'q' key to stop
            break
        elif k == -1:  # normally -1 returned,so don't print it
            continue
        else:
            print(f"Press 'q' key for EXIT!")
Exemple #17
0
 def __init__(self, topicname):
     # ensure_topic(topicname)
     super().__init__({
         "group.id": "pyrandall-pytest",
         "bootstrap.servers": bootstrap_servers
     })
     self.waiting_assignment = True
     self.topic = topicname
     self.topic_partition = TopicPartition(topic=self.topic, partition=0)
     # open connection with kafka and do not rely on latest offset.
     # because latest offset is retrieved on partition assignment
     # that would happen after the messages in our functional tests are produced
     self.goto_largest_offset()
     self.subscribe([topicname], self.on_assign)
    def __on_partition_state_change(self, topic, partition,
                                    previous_state_and_offsets,
                                    current_state_and_offsets):
        """
        Callback that is invoked when a partition state changes.
        """
        logger.debug(
            "State change for %r: %r to %r",
            (topic, partition),
            previous_state_and_offsets,
            current_state_and_offsets,
        )

        current_state, current_offsets = current_state_and_offsets
        if current_offsets.local is None:
            # It only makes sense to manipulate the consumer if we've got an
            # assignment. (This block should only be entered at startup if the
            # remote offsets are retrieved from the commit log before the local
            # consumer has received its assignment.)
            return

        # TODO: This will be called from the commit log consumer thread, so need
        # to verify that calling the ``consumer.{pause,resume}`` methods is
        # thread safe!
        if current_state in (
                SynchronizedPartitionState.UNKNOWN,
                SynchronizedPartitionState.SYNCHRONIZED,
                SynchronizedPartitionState.REMOTE_BEHIND,
        ):
            self.__consumer.pause(
                [TopicPartition(topic, partition, current_offsets.local)])
        elif current_state is SynchronizedPartitionState.LOCAL_BEHIND:
            self.__consumer.resume(
                [TopicPartition(topic, partition, current_offsets.local)])
        else:
            raise NotImplementedError(
                f"Unexpected partition state: {current_state}")
def listen_for_messages(msg, consumer):
    """
    Listen for messages on the hccm topic.

    Once a message from one of these topics arrives, we add
    them extract the payload and line item process the report files.

    Once all files from the manifest are complete a celery job is
    dispatched to the worker to complete summary processing for the manifest.

    Several exceptions can occur while listening for messages:
    Database Errors - Re-processing attempts will be made until successful.
    Internal Errors - Re-processing attempts will be made until successful.
    Report Processing Errors - Kafka message will be committed with an error.
                               Errors of this type would require a report processor
                               fix and we do not want to block the message queue.

    Upon successful processing the kafka message is manually committed.  Manual
    commits are used so we can use the message queue to store unprocessed messages
    to make the service more tolerant of SIGTERM events.

    Args:
        consumer - (Consumer): kafka consumer for HCCM ingress topic.

    Returns:
        None

    """
    offset = msg.offset()
    partition = msg.partition()
    topic_partition = TopicPartition(topic=Config.HCCM_TOPIC, partition=partition, offset=offset)
    try:
        LOG.info(f"Processing message offset: {offset} partition: {partition}")
        process_messages(msg)
        LOG.debug(f"COMMITTING: message offset: {offset} partition: {partition}")
        consumer.commit()
    except (InterfaceError, OperationalError, ReportProcessorDBError) as error:
        close_and_set_db_connection()
        LOG.error(f"[listen_for_messages] Database error. Error: {type(error).__name__}: {error}. Retrying...")
        rewind_consumer_to_retry(consumer, topic_partition)
    except (KafkaMsgHandlerError, RabbitOperationalError) as error:
        LOG.error(f"[listen_for_messages] Internal error. {type(error).__name__}: {error}. Retrying...")
        rewind_consumer_to_retry(consumer, topic_partition)
    except ReportProcessorError as error:
        LOG.error(f"[listen_for_messages] Report processing error: {str(error)}")
        LOG.debug(f"COMMITTING: message offset: {offset} partition: {partition}")
        consumer.commit()
    except Exception as error:
        LOG.error(f"[listen_for_messages] UNKNOWN error encountered: {type(error).__name__}: {error}", exc_info=True)
def test_json_record_serialization_custom(kafka_cluster, load_file):
    """
    Ensures to_dict and from_dict hooks are properly applied by the serializer.

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture

        load_file (callable(str)): JSON Schema file reader

    """
    topic = kafka_cluster.create_topic("serialization-json")
    sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'})

    schema_str = load_file("product.json")
    value_serializer = JSONSerializer(schema_str,
                                      sr,
                                      to_dict=_testProduct_to_dict)
    value_deserializer = JSONDeserializer(schema_str,
                                          from_dict=_testProduct_from_dict)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    record = _TestProduct(product_id=1,
                          name="The ice sculpture",
                          price=12.50,
                          tags=["cold", "ice"],
                          dimensions={
                              "length": 7.0,
                              "width": 12.0,
                              "height": 9.5
                          },
                          location={
                              "latitude": -78.75,
                              "longitude": 20.4
                          })

    producer.produce(topic, value=record, partition=0)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    actual = msg.value()

    assert all([
        getattr(actual, attribute) == getattr(record, attribute)
        for attribute in vars(record)
    ])
 def get_messages(self, timestamp):
     ret = []
     while len(self.queue) > 0 and self.queue[0].message.timestamp(
     )[1] <= timestamp:
         ret.append(self.queue.popleft().message)
     if len(self.queue
            ) < self.min_limit and self.paused and not self.stopped:
         logging.debug('Resume reading on topic: {}'.format(
             self.topic_name))
         self.paused = False
         self.consumer_ref.resume([
             TopicPartition(topic=self.topic_name, partition=self.partition)
         ])
     self.last_message_ts = timestamp
     return ret
Exemple #22
0
    def _size(self, queue):
        """Get the number of pending messages in the topic/queue."""
        queue = self.sanitize_queue_name(queue)

        consumer = self._kafka_consumers.get(queue, None)
        if consumer is None:
            return 0

        size = 0
        for assignment in consumer.assignment():
            topic_partition = TopicPartition(queue, assignment.partition)
            (_, end_offset) = consumer.get_watermark_offsets(topic_partition)
            [committed_offset] = consumer.committed([topic_partition])
            size += end_offset - committed_offset.offset
        return size
Exemple #23
0
def count_messages(bootstrap_servers):
    c = Consumer({
        'bootstrap.servers': bootstrap_servers,
        'group.id': 'group2',
        'enable.auto.commit': False,
        'auto.offset.reset': 'beginning'
    })

    metadata = c.list_topics()
    topics = metadata.topics
    for topic, topicMetadata in topics.items():
        for partition in topicMetadata.partitions:
            (low,
             high) = c.get_watermark_offsets(TopicPartition(topic, partition))
            print(f"{topic} {partition}: {high}")
Exemple #24
0
        def commit_offsets():
            offsets_to_commit = []
            for (topic, partition), offset in owned_partition_offsets.items():
                if offset is None:
                    logger.debug('Skipping commit of unprocessed partition: %r', (topic, partition))
                    continue

                offsets_to_commit.append(TopicPartition(topic, partition, offset))

            if offsets_to_commit:
                logger.debug(
                    'Committing offset(s) for %s owned partition(s): %r',
                    len(offsets_to_commit),
                    offsets_to_commit)
                commit(offsets_to_commit)
Exemple #25
0
    def commitOffsets(self):
        """ Commit consumed offsets if needed """

        # may be asked to commit on rebalance or shutdown but
        # should only commit if the processor has requested.
        if self.commitOffsetNeeded:
            offsetsToCommit = [
                TopicPartition(t, p, o + 1)
                for ((t, p), o) in self.consumedOffsets.items()
            ]
            self.consumer.commit(offsets=offsetsToCommit, async=False)
            self.consumedOffsets.clear()
            self.commitOffsetNeeded = False

        self.commitRequested = False
Exemple #26
0
    def __init__(self, topic=None, ip='localhost'):
        self.topic = topic
        self.ip = ip  # os.environ['KAFKA_SERVER_IP']
        self.base_config = {
            'bootstrap.servers': self.ip + ':9092',
            'schema.registry.url': 'http://' + self.ip + ':8081'
        }

        self.avro_consumer = AvroConsumer(
            dict(self.base_config, **{'group.id': 'groupid'}))

        self.avro_consumer.assign([TopicPartition(self.topic, 0)])
        self.key_schema = avro.load(os.path.join(SCHEMAS, 'keyschema.avsc'))
        self.value_schema = avro.load(
            os.path.join(SCHEMAS, self.topic + '.avsc'))
def test_json_record_serialization(kafka_cluster, load_file):
    """
    Tests basic JsonSerializer and JsonDeserializer basic functionality.

    product.json from:
        https://json-schema.org/learn/getting-started-step-by-step.html

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture

        load_file (callable(str)): JSON Schema file reader

    """
    topic = kafka_cluster.create_topic("serialization-json")
    sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'})

    schema_str = load_file("product.json")
    value_serializer = JSONSerializer(schema_str, sr)
    value_deserializer = JSONDeserializer(schema_str)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    record = {
        "productId": 1,
        "productName": "An ice sculpture",
        "price": 12.50,
        "tags": ["cold", "ice"],
        "dimensions": {
            "length": 7.0,
            "width": 12.0,
            "height": 9.5
        },
        "warehouseLocation": {
            "latitude": -78.75,
            "longitude": 20.4
        }
    }

    producer.produce(topic, value=record, partition=0)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    actual = msg.value()

    assert all([actual[k] == v for k, v in record.items()])
def get_kafka_old_offset(topic, kafka_broker, partition_count):

    kafka_old_offset = {}
    #kafka_new_offset = {}

    try:
        #Get kafka offset through kafka module
        '''
        from kafka import SimpleClient
        from kafka.protocol.offset import OffsetRequest, OffsetResetStrategy
        from kafka.common import OffsetRequestPayload
        client = SimpleClient(broker_list)
        partitions = client.topic_partitions[topic]
        offset_requests = [OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()]
        offsets_responses = client.send_offset_request(offset_requests)
        for r in offsets_responses:
            #print("partition = %s, offset = %s"%(r.partition, r.offsets[0]))
            kafka_old_offset[r.partition] = r.offsets[0]
       '''
        # Get kafka offset through confluent_kafka module
        from confluent_kafka import TopicPartition, Consumer, KafkaException
        from confluent_kafka.admin import AdminClient

        conf = {'bootstrap.servers': kafka_broker, 'session.timeout.ms': 6000}
        admin_client = AdminClient(conf)
        consumer_client = Consumer(conf)

        md = admin_client.list_topics(timeout=10)
        for t in iter(md.topics.values()):
            if str(t) == topic:
                for p in iter(t.partitions.values()):
                    td = TopicPartition(str(t), p.id)
                    oldest_offset, newest_offset = consumer_client.get_watermark_offsets(
                        td)
                    kafka_old_offset[p.id] = oldest_offset
                    #kafka_new_offset[p.id] = newest_offset
    except ImportError:
        for partition_id in range(partition_count):
            command = 'kafka-run-class kafka.tools.GetOffsetShell --topic {} --broker-list {} --time -2 --partition {}'.format(
                topic, kafka_broker, partition_id)
            #args = shlex.split(command)
            #process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            #output = '{}'.format(process.stdout.read().decode(encoding='UTF-8'))
            output = utils.shell_wrapper.check_output(command)
            offset = output.split(':')[2]
            kafka_old_offset[partition_id] = int(offset)

    return kafka_old_offset
def test_offsets_for_times():
    c = Consumer({
        'group.id': 'test',
        'enable.auto.commit': True,
        'enable.auto.offset.store': False,
        'socket.timeout.ms': 50,
        'session.timeout.ms': 100
    })
    # Query broker for timestamps for partition
    try:
        test_topic_partition = TopicPartition("test", 0, 100)
        c.offsets_for_times([test_topic_partition], timeout=0.1)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\
            str(e.args([0]))
    c.close()
Exemple #30
0
def get_last_available_status_message(cons: Consumer, status_topic: str):
    """

    :param cons:
    :param status_topic:
    :return: The last status message.
    """
    partitions = cons.assignment()
    _, hi = cons.get_watermark_offsets(partitions[0],
                                       cached=False,
                                       timeout=2.0)
    last_msg_offset = hi - 1
    cons.assign(
        [TopicPartition(status_topic, partition=0, offset=last_msg_offset)])
    status_msg, _ = poll_for_valid_message(cons, expected_file_identifier=None)
    return status_msg