def open_consumer(stream_host_and_port_list, topic_name, group_name):
    consumer = Consumer({'bootstrap.servers': stream_host_and_port_list, # kafka broker
                           'group.id': group_name, # consumer group
                           'api.version.request':'true'
                        })
    consumer.subscribe([topic_name])
    return consumer
Beispiel #2
0
class KafkaWorkflowResultsReceiver(object):
    _requires = ['confluent-kafka']

    def __init__(self, message_converter=ProtobufWorkflowResultsConverter, current_app=None):
        import walkoff.server.workflowresults  # Need this import

        self.thread_exit = False

        kafka_config = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_CONFIG
        self.receiver = Consumer(kafka_config)
        self.topic = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_TOPIC
        self.message_converter = message_converter
        self.workflows_executed = 0

        if current_app is None:
            self.current_app = Flask(__name__)
            self.current_app.config.from_object(walkoff.config.Config)
            self.current_app.running_context = context.Context(init_all=False)
        else:
            self.current_app = current_app

    def receive_results(self):
        """Constantly receives data from the Kafka Consumer and handles it accordingly"""
        logger.info('Starting Kafka workflow results receiver')
        self.receiver.subscribe(['{}.*'.format(self.topic)])
        while not self.thread_exit:
            raw_message = self.receiver.poll(1.0)
            if raw_message is None:
                gevent.sleep(0.1)
                continue
            if raw_message.error():
                if raw_message.error().code() == KafkaError._PARTITION_EOF:
                    gevent.sleep(0.1)
                    continue
                else:
                    logger.error('Received an error in Kafka receiver: {}'.format(raw_message.error()))
                    gevent.sleep(0.1)
                    continue
            with self.current_app.app_context():
                self._send_callback(raw_message.value())
        self.receiver.close()
        return

    def _send_callback(self, message_bytes):
        event, sender, data = self.message_converter.to_event_callback(message_bytes)

        if sender is not None and event is not None:
            with self.current_app.app_context():
                event.send(sender, data=data)
            if event in [WalkoffEvent.WorkflowShutdown, WalkoffEvent.WorkflowAborted]:
                self._increment_execution_count()

    def _increment_execution_count(self):
        self.workflows_executed += 1
def test_offsets_for_times():
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})
    # Query broker for timestamps for partition
    try:
        test_topic_partition = TopicPartition("test", 0, 100)
        c.offsets_for_times([test_topic_partition], timeout=0.1)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\
            str(e.args([0]))
    c.close()
class KafkaWorkflowCommunicationReceiver(object):
    """Receives communication via Kafka and sends it to the executing workflow"""
    _requires = ['confluent-kafka']

    def __init__(self, message_converter=ProtobufWorkflowCommunicationConverter):
        self._ready = False

        kafka_config = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_CONFIG
        self.receiver = Consumer(kafka_config)
        self.topic = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_TOPIC
        self.message_converter = message_converter
        self.exit = False

        if self.check_status():
            self._ready = True

    def shutdown(self):
        self.exit = True
        self.receiver.close()

    def receive_communications(self):
        """Constantly receives data from the Kafka and handles it accordingly"""
        logger.info('Starting workflow communication receiver')
        while not self.exit:
            raw_message = self.receiver.poll(1.0)
            if raw_message is None:
                continue
            if raw_message.error():
                if raw_message.error().code() == KafkaError._PARTITION_EOF:
                    continue
                else:
                    logger.error('Received an error in Kafka receiver: {}'.format(raw_message.error()))
                    continue

            message = self.message_converter.to_received_message(raw_message.value())
            if message is not None:
                yield message
            else:
                break

        raise StopIteration

    def is_ready(self):
        return self._ready

    def check_status(self):
        if self.receiver is not None:
            return True
        return False
Beispiel #5
0
    def __init__(self, bootstrap_servers, consumer_group, commit_log_topic,
                 synchronize_commit_group, initial_offset_reset='latest', on_commit=None):
        self.bootstrap_servers = bootstrap_servers
        self.consumer_group = consumer_group
        self.commit_log_topic = commit_log_topic
        self.synchronize_commit_group = synchronize_commit_group
        self.initial_offset_reset = self.initial_offset_reset_strategies[initial_offset_reset]

        self.__partition_state_manager = SynchronizedPartitionStateManager(
            self.__on_partition_state_change)
        self.__commit_log_consumer, self.__commit_log_consumer_stop_request = self.__start_commit_log_consumer()

        self.__positions = {}

        def commit_callback(error, partitions):
            if on_commit is not None:
                return on_commit(error, partitions)

        consumer_configuration = {
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': self.consumer_group,
            'enable.auto.commit': 'false',
            'enable.auto.offset.store': 'true',
            'enable.partition.eof': 'false',
            'default.topic.config': {
                'auto.offset.reset': 'error',
            },
            'on_commit': commit_callback,
        }

        self.__consumer = Consumer(consumer_configuration)
    def __init__(self, message_converter=ProtobufWorkflowCommunicationConverter):
        self._ready = False

        kafka_config = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_CONFIG
        self.receiver = Consumer(kafka_config)
        self.topic = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_TOPIC
        self.message_converter = message_converter
        self.exit = False

        if self.check_status():
            self._ready = True
def test_multiple_close_throw_exception():
    """ Calling Consumer.close() multiple times should throw Runtime Exception
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])

    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.close()
    assert 'Consumer already closed' == str(ex.value)
Beispiel #8
0
def analytics_internet3_logs():
    consumer = Consumer({'bootstrap.servers': kafka_hosts, 'group.id': 'Internet3_logs_%s' %dt,
                         'default.topic.config': {'auto.offset.reset': 'latest', 'auto.commit.enable': 'true'}})
    consumer.subscribe(['haproxy_logs'])
    try:
        while True:
            msg = consumer.poll()
            if not msg.error():
                Msg = msg.value().decode('utf-8').strip()
                try:
                    tm = time.strftime('%Y%m%d%H%M', time.localtime())
                    if Msg:
                        Msg = Msg.split()
                        if len(Msg) >= 17:
                            internet_access_minute = 'internet_access_minute_%s' % tm
                            RC.incr(internet_access_minute)
                            RC.expire(internet_access_minute,3600)
                except Exception as e:
                    logging.error(e)
                    continue
            elif msg.error().code() != KafkaError._PARTITION_EOF:
                logging.error(msg.error())
                continue
    except Exception as e:
        logging.error(e)
    finally:
        consumer.close()
def test_store_offsets():
    """ Basic store_offsets() tests """

    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])

    try:
        c.store_offsets(offsets=[TopicPartition("test", 0, 42)])
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._UNKNOWN_PARTITION

    c.unsubscribe()
    c.close()
 def __init__(self, conf):
     """
     conf is a config dict passed to confluent_kafka.Consumer()
     """
     super(VerifiableConsumer, self).__init__(conf)
     self.conf['on_commit'] = self.on_commit
     self.consumer = Consumer(**conf)
     self.consumed_msgs = 0
     self.consumed_msgs_last_reported = 0
     self.consumed_msgs_at_last_commit = 0
     self.use_auto_commit = False
     self.use_async_commit = False
     self.max_msgs = -1
     self.assignment = []
     self.assignment_dict = dict()
def subscribe():
	c = Consumer({'bootstrap.servers': '0', 'group.id': 'test-consumer-group', 'default.topic.config': {'auto.offset.reset': 'smallest'}})
	c.subscribe(['neuronraindata'])
	while True:
	    msg = c.poll()
	    if not msg.error() and msg.value():
	        print('Received message: ' , msg.value().encode("utf-8"))
	    else:
	        print(msg.error())
	c.close()
def consume():

    c = Consumer({'bootstrap.servers': KAFKA_SERVER, 'group.id': 'mygroup',
              'default.topic.config': {'auto.offset.reset': 'smallest'}})
    c.subscribe([KAFKA_TOPIC])
    while True:
        msg = c.poll()
        if not msg.error():
            print('Received message: %s' % msg.value().decode('utf-8'))
    c.close()
def test_on_commit():
    """ Verify that on_commit is only called once per commit() (issue #71) """

    class CommitState(object):
        def __init__(self, topic, partition):
            self.topic = topic
            self.partition = partition
            self.once = True

    def commit_cb(cs, err, ps):
        print('on_commit: err %s, partitions %s' % (err, ps))
        assert cs.once is True
        assert err == KafkaError._NO_OFFSET
        assert len(ps) == 1
        p = ps[0]
        assert p.topic == cs.topic
        assert p.partition == cs.partition
        cs.once = False

    cs = CommitState('test', 2)

    c = Consumer({'group.id': 'x',
                  'enable.auto.commit': False, 'socket.timeout.ms': 50,
                  'session.timeout.ms': 100,
                  'on_commit': lambda err, ps: commit_cb(cs, err, ps)})

    c.assign([TopicPartition(cs.topic, cs.partition)])

    for i in range(1, 3):
        c.poll(0.1)

        if cs.once:
            # Try commit once
            try:
                c.commit(asynchronous=False)
            except KafkaException as e:
                print('commit failed with %s (expected)' % e)
                assert e.args[0].code() == KafkaError._NO_OFFSET

    c.close()
Beispiel #14
0
    def __init__(self, message_converter=ProtobufWorkflowResultsConverter, current_app=None):
        import walkoff.server.workflowresults  # Need this import

        self.thread_exit = False

        kafka_config = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_CONFIG
        self.receiver = Consumer(kafka_config)
        self.topic = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_TOPIC
        self.message_converter = message_converter
        self.workflows_executed = 0

        if current_app is None:
            self.current_app = Flask(__name__)
            self.current_app.config.from_object(walkoff.config.Config)
            self.current_app.running_context = context.Context(init_all=False)
        else:
            self.current_app = current_app
    def connect(self):
        self.consumer = Consumer({
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': self.group_id,
            'default.topic.config': {
                'auto.offset.reset': 'smallest'
            }
        })
        print("subscribing to %s" % self.consumer_topic)
        self.consumer.subscribe([
            self.consumer_topic
        ])
        print("Subscribed to topic %s " % self.consumer_topic)

        self.producer = Producer({
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': self.group_id
        })
Beispiel #16
0
async def consume_events(topic, group, brokers, callback, schema=None,registry=None,delay=0.01,**kwargs):
    """
    Connect to the Kafka endpoint and start consuming
    messages from the given `topic`.
    The given callback is applied on each
    message.
    """    
    global consumer
    if topic in consumers:
        raise RuntimeError("A consumer already exists for topic: %s" % topic)

    if (not registry_serializer or not registry_client) and registry:
        r_client,serializer = create_registry_client(registry)


    consumer = Consumer({'bootstrap.servers': brokers, 'group.id': group,
              'default.topic.config': {'auto.offset.reset': 'largest'}})
    consumer.subscribe([topic])
    consumers[topic] = consumer


    try:
        while True:
            message = consumer.poll(1)
            if message:
               if not message.error():
                   if registry:
                       message = serializer.decode_message(message.value())
                   else:
                       message = message.value()

                   await callback(message)
                   consumer.commit()
            else:
                   await asyncio.sleep(delay)
    except KafkaException as ex:
        pass
    else:
        consumer.close()
    finally:
        consumers.pop(topic, None)
Beispiel #17
0
def analytics_intranet_logs():
    consumer = Consumer({'bootstrap.servers': kafka_hosts, 'group.id': 'Intranet_logs_%s' %dt,'default.topic.config': {'auto.offset.reset': 'latest','auto.commit.enable':'true'}})
    consumer.subscribe(['haproxy2_logs'])
    try:
        while True:
            msg = consumer.poll()
            if not msg.error():
                Msg = msg.value().decode('utf-8').strip()
                try:
                    tt = time.strftime('%Y%m%d', time.localtime())
                    th = time.strftime('%Y%m%d%H', time.localtime())
                    tm = time.strftime('%Y%m%d%H%M', time.localtime())
                    H_key = 'haproxy2_topic_%s' % tt
                    top2_url_hour = 'top2_url_hour_%s' % th
                    top2_url_minute = 'top2_url_minute_%s' % tm
                    if len(Msg.split()) >= 17:
                        val = Msg.split('{')
                        if len(val) >= 2:
                            Topic = val[1].split('}')[0]
                            Rtime = val[0].split()[8]
                            Rtime = int(Rtime.split('/')[4])
                            if ':' in Topic:
                                Topic = str(Topic.split(':')[0])
                            if '|' in Topic:
                                Topic = str(Topic.split('|')[0])
                            if '.baihe.com' in Topic:
                                Key = 'haproxy2_logs_%s_%s' % (tt, Topic)
                                Rt_Key = 'Rtime2_%s_%s' % (tt, Topic)
                                # 接口
                                PATH = str(Msg.split()[17]).split('?')[0]
                                URL = 'http://%s%s' % (Topic,PATH)
                                RC.zincrby(top2_url_hour, URL, 1)
                                RC.zincrby(top2_url_minute, URL, 1)
                                for KEY in (H_key, Key, Rt_Key,top2_url_hour,top2_url_minute):
                                    RC.expire(KEY,3600)
                                RC.sadd(H_key, Topic)
                                RC.incr(Key)
                                if Rtime:
                                    RC.lpush(Rt_Key, Rtime)
                except Exception as e:
                    logging.error(e)
                    continue
            elif msg.error().code() != KafkaError._PARTITION_EOF:
                logging.error(msg.error())
                continue
    except Exception as e:
        logging.error(e)
    finally:
        consumer.close()
Beispiel #18
0
def analytics_internet_logs():
    consumer = Consumer({'bootstrap.servers': kafka_hosts, 'group.id': 'Internet_logs_%s' %dt,'default.topic.config': {'auto.offset.reset': 'latest','auto.commit.enable':'true'}})
    consumer.subscribe(['haproxy_logs'])
    try:
        while True:
            msg = consumer.poll()
            if not msg.error():
                Msg = msg.value().decode('utf-8').strip()
                try:
                    tt = time.strftime('%Y%m%d', time.localtime())
                    th = time.strftime('%Y%m%d%H', time.localtime())
                    pv_key = 'baihe_pv_%s' % tt
                    if Msg:
                        Msg = Msg.split()
                        RC.incr(pv_key)
                        if len(Msg) >= 17:
                            Topic = str(Msg[14]).split('|')[0].replace('{', '').strip()
                            IP = str(Msg[5])
                            H_key = 'haproxy_topic_%s' % tt
                            top_ip = 'top_ip_%s' % tt
                            top_ip_hour = 'top_ip_%s' % th
                            top_url_hour = 'top_url_%s' % th
                            PATH = str(Msg[16]).split('?')[0]
                            URL = 'http://%s%s' % (Topic,PATH)
                            Ha_Key = 'haproxy_logs_%s_%s' % (tt, Topic)
                            top_ip_domain = 'top_%s_domain_%s' % (IP, tt)
                            top_ip_domain_hour = 'top_%s_domain_%s' % (IP, th)
                            for KEY in (H_key, pv_key, top_ip, top_url_hour, top_ip_hour,Ha_Key, top_ip_domain, top_ip_domain_hour):
                                RC.expire(KEY,3600)
                            RC.sadd(H_key, Topic)
                            RC.incr(Ha_Key)
                            # ip
                            RC.zincrby(top_ip, IP, 1)
                            RC.zincrby(top_ip_hour, IP, 1)
                            # IP_接口
                            RC.zincrby(top_ip_domain, URL, 1)
                            RC.zincrby(top_ip_domain_hour, URL, 1)
                            # 接口
                            RC.zincrby(top_url_hour, URL, 1)
                except:
                    continue
            elif msg.error().code() != KafkaError._PARTITION_EOF:
                logging.error(msg.error())
                continue
    except Exception as e:
        logging.error(e)
    finally:
        consumer.close()
Beispiel #19
0
def WAF_logs():
    consumer = Consumer({'bootstrap.servers': kafka_hosts, 'group.id': 'Waf_logs_%s' %dt,'default.topic.config': {'auto.offset.reset': 'latest','auto.commit.enable':'true'}})
    consumer.subscribe(['haproxy_logs'])
    try:
        while True:
            msg = consumer.poll()
            if not msg.error():
                Msg = msg.value().decode('utf-8').strip()
                try:
                    tm = time.strftime('%Y%m%d%H%M',time.localtime())
                    if Msg:
                        Msg = Msg.split()
                        if len(Msg) >= 17:
                            url_code = Msg[9]
                            Topic =str(Msg[14]).split('|')[0].replace('{','').strip()
                            IP = str(Msg[5])
                            if url_code in ('200', '206', '301', '302', '304', '404'):
                                top_ip_minute = 'top_ip_%s' % tm
                                top_url_minute = 'top_url_%s' % tm
                                PATH = str(Msg[16]).split('?')[0]
                                URL = 'http://%s%s' % (Topic,PATH)
                                top_ip_domain_minute = 'top_%s_domain_%s' % (IP, tm)
                                top_url_ip_minute = 'top_%s_ip_%s' % (URL, tm)
                                # ip
                                RC.zincrby(top_ip_minute, IP, 1)
                                RC.expire(top_ip_minute, 300)
                                # IP_接口
                                RC.zincrby(top_ip_domain_minute, URL, 1)
                                RC.expire(top_ip_domain_minute, 300)
                                # 接口
                                RC.zincrby(top_url_minute, URL, 1)
                                RC.expire(top_url_minute, 300)
                                # 接口_ip
                                RC.zincrby(top_url_ip_minute, IP, 1)
                                RC.expire(top_url_ip_minute, 300)
                except Exception as e:
                    logging.error(e)
                    continue
            elif msg.error().code() != KafkaError._PARTITION_EOF:
                logging.error(msg.error())
                continue
    except Exception as e:
        logging.error(e)
    finally:
        consumer.close()
    # corresponding to register / deregister event
    insertRegisterDeregisterMsg(msg)


settings = {
    'bootstrap.servers': 'localhost:9092',
    'group.id': 'mygroup',
    'client.id': 'client-1',
    'enable.auto.commit': True,
    'session.timeout.ms': 6000,
    'default.topic.config': {
        'auto.offset.reset': 'smallest'
    }
}

c = Consumer(settings)
c.subscribe([vehicleConstants.KAFKA_TOPIC_VEHICLE_REGISTER_DEREGISTER])

try:
    while True:
        msg = c.poll(0.1)
        if msg is None:
            continue
        elif not msg.error():
            print('Received message: {0}'.format(msg.value()))
        elif msg.error().code() == KafkaError._PARTITION_EOF:
            print('End of partition reached {0}/{1}'.format(
                msg.topic(), msg.partition()))
        else:
            print('Error occured: {0}'.format(msg.error().str()))
        print(msg)
Beispiel #21
0
    def ConsumeMessages(self):
        c = Consumer({
            'bootstrap.servers': 'localhost:9092',
            'group.id': 'mygroup2',
            'auto.offset.reset': 'largest'
        })

        c.subscribe(['test'])
        self.counter = 0
        if self.counter == 0:
            self.now = math.floor(time.time() - (time.time() % 10) + 1)
            self.end = self.now + 5
            self.counter += 1

        while True:
            msg = c.poll(0.1)
            if msg is None:
                continue
            if msg.error():
                print("Consumer error: {}".format(msg.error()))
                continue

            self.data = literal_eval(msg.value().decode('utf-8'))

            if math.floor(self.data['timestamp']) < self.now:
                pass

            if (math.floor(self.data['timestamp']) >= self.now) and (
                    math.floor(self.data['timestamp']) < self.end):
                self.dict_ttl.append_values(self.data['sensor'],
                                            self.data["data"])

            if math.floor(self.data['timestamp']) >= self.end:
                print('\nLength of dict is {}'.format(len(list(
                    self.dict_ttl))))
                if len(list(self.dict_ttl)) == 0:
                    self.now = self.now + 5
                    self.end = self.end + 5
                    continue
                for k, v in self.dict_ttl.ttl_items():
                    self.df = self.df.append(
                        {
                            'timestamp': self.now,
                            'data': v[0][1],
                            'sensor': k
                        },
                        ignore_index=True)
                if self.df.empty:
                    print('empty df')
                    continue

                print('sum of items is {}'.format(
                    str(self.df['data'].apply(lambda x: sum(x)).tolist()[0])))
                self.total = self.df['data'].apply(
                    lambda x: sum(x)).tolist()[0]
                if int(self.total) < 7000:
                    print('This row would be discarded')
                self.df['data'] = self.df['data'].apply(
                    lambda x: sum(x) / len(x))

                self.df = self.df.groupby(['timestamp', 'sensor'
                                           ]).agg('mean').reset_index()
                print(
                    "Mean of all items for this sensor within window {},{} is "
                    .format(self.now, self.end))
                print(self.df)
                print('\n---------------------------')
                self.df = pd.DataFrame()
                self.dict_ttl = DefaultDictTTL(5)
                self.now = self.now + 5
                self.end = self.end + 5

        c.close()
Beispiel #22
0
class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "bootstrap.servers": ",".join(["PLAINTEXT://localhost:9092"]),
            "group.id": f"{topic_name_pattern}",
            "default.topic.config": {
                "auto.offset.reset": "earliest"
            },
        }

        # TODO: Create the Consumer, using the appropriate type.
        if is_avro is True:
            # TODO: Make sure to set schema registry
            self.broker_properties[
                "schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)

        #
        #
        # TODO: Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        #
        #
        # self.consumer.subscribe( TODO )
        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        # TODO: If the topic is configured to use `offset_earliest` set the partition offset to
        # the beginning or earliest
        for partition in partitions:
            if self.offset_earliest is True:
                partition.offset = confluent_kafka.OFFSET_BEGINNING

        logger.info("partitions assigned for {self.topic_name_pattern}")
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        #
        #
        # TODO: Poll Kafka for messages. Make sure to handle any errors or exceptions.
        # Additionally, make sure you return 1 when a message is processed, and 0 when no message
        # is retrieved.
        #
        #
        logger.debug(f"consuming from topic pattern {self.topic_name_pattern}")
        try:
            message = self.consumer.poll(timeout=self.consume_timeout)
        except SerializerError as e:
            logger.error(
                f"failed to deserialize message {self.topic_name_pattern}: {e}"
            )
            return 0

        if message is None:
            logger.debug("no messages to be consumed")
            return 0
        elif message.error() is not None:
            logger.error(
                f"failed to consume message {self.topic_name_pattern}: {message.error()}"
            )
            return 0

        logger.debug(f"message received: ({message.key()}) {message.value()}")
        self.message_handler(message)
        return 1

    def close(self):
        """Cleans up any open kafka consumers"""
        #
        #
        # TODO: Cleanup the kafka consumer
        #
        #
        logger.debug("closing consumer...")
        self.consumer.close()
Beispiel #23
0
class ConsumerServer:
    def __init__(
        self,
        topic_name_pattern=TOPIC,
        broker_urls=BROADCAST_URL,
        message_handler=handle_json_message,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest
        self.broker_properties = {
            'bootstrap.servers': broker_urls,
            'group.id': '0'
        }

        self.consumer = Consumer(self.broker_properties)
        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        for partition in partitions:
            if self.offset_earliest:
                partition.offset = OFFSET_BEGINNING

        logger.info("partitions assigned for %s", self.topic_name_pattern)
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            time.sleep(1)
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        message = self.consumer.poll(timeout=self.consume_timeout)
        if message is None:
            logger.info("no message received for pattern %s",
                        self.topic_name_pattern)
            return 0
        elif message.error():
            logger.error("error - failed to consume data")
            return 0
        else:
            self.message_handler(message)
            return 1

    def close(self):
        """Cleans up any open kafka consumers"""
        logger.info("Shutdown consumer")
        self.consumer.close()
Beispiel #24
0
class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        #self.broker_properties = {
        #    #
        #    # TODO
        #    #
        #    "kafka" : "PLAINTEXT://localhost:9092",
        #    "schema_registry" : "http://localhost:8081"
        #}
        self.broker_properties = {
            "bootstrap.servers": "localhost:9092",
            #"bootstrap.servers": "PLAINTEXT://localhost:9092",
            "group.id": "udacity",
            "auto.offset.reset": "earliest" if offset_earliest else "latest"
        }

        # TODO: Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties[
                "schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(self.broker_properties)
            #self.consumer = AvroConsumer(
            #    {
            #        "bootstrap.servers": self.broker_properties["kafka"],
            #        "schema.registry.url": self.broker_properties["schema_registry"],
            #        "group.id": "0",
            #        "auto.offset.reset": "earliest"
            #        self.broker_properties["schema.registry.url"] = "http://localhost:8081"
            #
            #    }
            #)
            logger.info("__init__ - AvroConsumer was created")
        else:
            self.consumer = Consumer(self.broker_properties)
            #self.consumer = Consumer(
            #    {
            #        "bootstrap.servers": self.broker_properties["kafka"],
            #        "group.id": "0",
            #        "auto.offset.reset": "earliest"
            #    }
            #)
            #pass
            logger.info("__init__ - Consumer was created")

        #
        #
        # TODO: Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        #
        #
        logger.info("Consumer will subscribe - %s", self.topic_name_pattern)
        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        # TODO: If the topic is configured to use `offset_earliest` set the partition offset to
        # the beginning or earliest
        #logger.info("on_assign is incomplete - skipping")
        logger.info("on_assign - self.topic_name_pattern: %s",
                    self.topic_name_pattern)
        logger.info("on_assign - partitions: %s", partitions)
        logger.info("on_assign - self.consumer: %s", self.consumer)
        #for partition in partitions:
        #    pass
        #    #
        #    #
        #    # TODO
        #    #
        #    #

        for partition in partitions:
            logger.info("on_assign - partition: %s", partition)
            partition.offset = OFFSET_BEGINNING

        logger.info("BEFORE partitions assigned for %s",
                    self.topic_name_pattern)
        consumer.assign(partitions)
        logger.info("AFTER partitions assigned for %s",
                    self.topic_name_pattern)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        #
        #
        # TODO: Poll Kafka for messages. Make sure to handle any errors or exceptions.
        # Additionally, make sure you return 1 when a message is processed, and 0 when no message
        # is retrieved.
        #
        #
        #logger.info("_consume is incomplete - skipping")
        #return 0
        message = self.consumer.poll(1.0)
        if message is None:
            logger.info("no message received by consumer: %s",
                        self.topic_name_pattern)
            #logger.info("no message received by consumer")
            return 0
        elif message.error() is not None:
            logger.info(f"error from consumer {message.error()}")
            return 0
        else:
            logger.info(f"consumed message {message.key()}: {message.value()}")
            self.message_handler(message)
            return 1

    def close(self):
        """Cleans up any open kafka consumers"""
        #
        #
        # TODO: Cleanup the kafka consumer
        #
        #
        self.consumer.close()
Beispiel #25
0
class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        self.broker_properties = {
            "bootstrap.servers": ",".join(["PLAINTEXT://localhost:9092"]),
            "group.id": f"{topic_name_pattern}",
            "default.topic.config": {
                "auto.offset.reset": "earliest"
            },
        }

        # TODO: Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties[
                "schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)

        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        for partition in partitions:
            if self.offset_earliest is True:
                partition.offset = confluent_kafka.OFFSET_BEGINNING
        logger.info("partitions assigned for %s", self.topic_name_pattern)
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        try:
            message = self.consumer.poll(timeout=self.consume_timeout)
        except SerializerError as e:
            return 0

        if message is None:
            return 0
        elif message.error() is not None:
            return 0

        self.message_handler(message)
        return 1

    def close(self):
        """Cleans up any open kafka consumers"""
        logger.debug("Cleaning up the kafka consumer")
        self.consumer.close()
    def _connect(self):
        connection = {'bootstrap.servers': self.host+":"+self.port, 'group.id': self.group, 'session.timeout.ms': 6000,
			'default.topic.config': {'auto.offset.reset': 'largest'}}
        logging.info("Connecting to Kafka at %s...", connection)
        self.consumer = Consumer(**connection)
        self.consumer.subscribe(self.topic, on_assign=self.on_assign)
Beispiel #27
0
class QuerySubscriptionConsumer:
    """
    A Kafka consumer that processes query subscription update messages. Each message has
    a related subscription id and the latest values related to the subscribed query.
    These values are passed along to a callback associated with the subscription.
    """

    topic_to_dataset: Dict[str, QueryDatasets] = {
        settings.KAFKA_EVENTS_SUBSCRIPTIONS_RESULTS:
        QueryDatasets.EVENTS,
        settings.KAFKA_TRANSACTIONS_SUBSCRIPTIONS_RESULTS:
        QueryDatasets.TRANSACTIONS,
    }

    def __init__(
        self,
        group_id: str,
        topic: Optional[str] = None,
        commit_batch_size: int = 100,
        initial_offset_reset: str = "earliest",
        force_offset_reset: Optional[str] = None,
    ):
        self.group_id = group_id
        if not topic:
            # TODO(typing): Need a way to get the actual value of settings to avoid this
            topic = cast(str, settings.KAFKA_EVENTS_SUBSCRIPTIONS_RESULTS)

        self.topic = topic
        cluster_name: str = settings.KAFKA_TOPICS[topic]["cluster"]
        self.commit_batch_size = commit_batch_size
        self.initial_offset_reset = initial_offset_reset
        self.offsets: Dict[int, Optional[int]] = {}
        self.consumer: Consumer = None
        self.cluster_options = kafka_config.get_kafka_consumer_cluster_options(
            cluster_name,
            {
                "group.id": self.group_id,
                "session.timeout.ms": 6000,
                "auto.offset.reset": self.initial_offset_reset,
                "enable.auto.commit": "false",
                "enable.auto.offset.store": "false",
                "enable.partition.eof": "false",
                "default.topic.config": {
                    "auto.offset.reset": self.initial_offset_reset
                },
            },
        )
        self.admin_cluster_options = kafka_config.get_kafka_admin_cluster_options(
            cluster_name, {"allow.auto.create.topics": "true"})
        self.resolve_partition_force_offset = self.offset_reset_name_to_func(
            force_offset_reset)
        self.__shutdown_requested = False

    def offset_reset_name_to_func(
        self, offset_reset: Optional[str]
    ) -> Optional[Callable[[TopicPartition], TopicPartition]]:
        if offset_reset in {"smallest", "earliest", "beginning"}:
            return self.resolve_partition_offset_earliest
        elif offset_reset in {"largest", "latest", "end"}:
            return self.resolve_partition_offset_latest
        return None

    def resolve_partition_offset_earliest(
            self, partition: TopicPartition) -> TopicPartition:
        low, high = self.consumer.get_watermark_offsets(partition)
        return TopicPartition(partition.topic, partition.partition, low)

    def resolve_partition_offset_latest(
            self, partition: TopicPartition) -> TopicPartition:
        low, high = self.consumer.get_watermark_offsets(partition)
        return TopicPartition(partition.topic, partition.partition, high)

    def run(self) -> None:
        logger.debug("Starting snuba query subscriber")
        self.offsets.clear()

        def on_assign(consumer: Consumer,
                      partitions: List[TopicPartition]) -> None:
            updated_partitions: List[TopicPartition] = []
            for partition in partitions:
                if self.resolve_partition_force_offset:
                    partition = self.resolve_partition_force_offset(partition)
                    updated_partitions.append(partition)

                if partition.offset == OFFSET_INVALID:
                    updated_offset = None
                else:
                    updated_offset = partition.offset
                self.offsets[partition.partition] = updated_offset
            if updated_partitions:
                self.consumer.assign(updated_partitions)
            logger.info(
                "query-subscription-consumer.on_assign",
                extra={
                    "offsets": str(self.offsets),
                    "partitions": str(partitions),
                },
            )

        def on_revoke(consumer: Consumer,
                      partitions: List[TopicPartition]) -> None:
            partition_numbers = [
                partition.partition for partition in partitions
            ]
            self.commit_offsets(partition_numbers)
            for partition_number in partition_numbers:
                self.offsets.pop(partition_number, None)
            logger.info(
                "query-subscription-consumer.on_revoke",
                extra={
                    "offsets": str(self.offsets),
                    "partitions": str(partitions),
                },
            )

        self.consumer = Consumer(self.cluster_options)
        self.__shutdown_requested = False

        if settings.KAFKA_CONSUMER_AUTO_CREATE_TOPICS:
            # This is required for confluent-kafka>=1.5.0, otherwise the topics will
            # not be automatically created.
            admin_client = AdminClient(self.admin_cluster_options)
            wait_for_topics(admin_client, [self.topic])

        self.consumer.subscribe([self.topic],
                                on_assign=on_assign,
                                on_revoke=on_revoke)

        i = 0
        while not self.__shutdown_requested:
            message = self.consumer.poll(0.1)
            if message is None:
                continue

            error = message.error()
            if error is not None:
                raise KafkaException(error)

            i = i + 1

            with sentry_sdk.start_transaction(
                    op="handle_message",
                    name="query_subscription_consumer_process_message",
                    sampled=random() <=
                    options.get("subscriptions-query.sample-rate"),
            ), metrics.timer("snuba_query_subscriber.handle_message"):
                self.handle_message(message)

            # Track latest completed message here, for use in `shutdown` handler.
            self.offsets[message.partition()] = message.offset() + 1

            if i % self.commit_batch_size == 0:
                logger.debug("Committing offsets")
                self.commit_offsets()

        logger.debug("Committing offsets and closing consumer")
        self.commit_offsets()
        self.consumer.close()

    def commit_offsets(self,
                       partitions: Optional[Iterable[int]] = None) -> None:
        logger.info(
            "query-subscription-consumer.commit_offsets",
            extra={
                "offsets": str(self.offsets),
                "partitions": str(partitions)
            },
        )

        if self.offsets and self.consumer:
            if partitions is None:
                partitions = self.offsets.keys()
            to_commit = []
            for partition in partitions:
                offset = self.offsets.get(partition)
                if offset is None:
                    # Skip partitions that have no offset
                    continue
                to_commit.append(TopicPartition(self.topic, partition, offset))

            self.consumer.commit(offsets=to_commit)

    def shutdown(self) -> None:
        self.__shutdown_requested = True

    def handle_message(self, message: Message) -> None:
        """
        Parses the value from Kafka, and if valid passes the payload to the callback defined by the
        subscription. If the subscription has been removed, or no longer has a valid callback then
        just log metrics/errors and continue.
        :param message:
        :return:
        """
        with sentry_sdk.push_scope() as scope:
            try:
                with metrics.timer(
                        "snuba_query_subscriber.parse_message_value"):
                    contents = self.parse_message_value(message.value())
            except InvalidMessageError:
                # If the message is in an invalid format, just log the error
                # and continue
                logger.exception(
                    "Subscription update could not be parsed",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return
            scope.set_tag("query_subscription_id", contents["subscription_id"])

            try:
                with metrics.timer(
                        "snuba_query_subscriber.fetch_subscription"):
                    subscription: QuerySubscription = QuerySubscription.objects.get_from_cache(
                        subscription_id=contents["subscription_id"])
                    if subscription.status != QuerySubscription.Status.ACTIVE.value:
                        metrics.incr(
                            "snuba_query_subscriber.subscription_inactive")
                        return
            except QuerySubscription.DoesNotExist:
                metrics.incr(
                    "snuba_query_subscriber.subscription_doesnt_exist")
                logger.error(
                    "Received subscription update, but subscription does not exist",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                try:
                    _delete_from_snuba(self.topic_to_dataset[message.topic()],
                                       contents["subscription_id"])
                except Exception:
                    logger.exception(
                        "Failed to delete unused subscription from snuba.")
                return

            if subscription.type not in subscriber_registry:
                metrics.incr(
                    "snuba_query_subscriber.subscription_type_not_registered")
                logger.error(
                    "Received subscription update, but no subscription handler registered",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return

            sentry_sdk.set_tag("project_id", subscription.project_id)
            sentry_sdk.set_tag("query_subscription_id",
                               contents["subscription_id"])

            callback = subscriber_registry[subscription.type]
            with sentry_sdk.start_span(
                    op="process_message") as span, metrics.timer(
                        "snuba_query_subscriber.callback.duration",
                        instance=subscription.type):
                span.set_data("payload", contents)
                span.set_data("subscription_dataset",
                              subscription.snuba_query.dataset)
                span.set_data("subscription_query",
                              subscription.snuba_query.query)
                span.set_data("subscription_aggregation",
                              subscription.snuba_query.aggregate)
                span.set_data("subscription_time_window",
                              subscription.snuba_query.time_window)
                span.set_data("subscription_resolution",
                              subscription.snuba_query.resolution)
                span.set_data("message_offset", message.offset())
                span.set_data("message_partition", message.partition())
                span.set_data("message_value", message.value())

                callback(contents, subscription)

    def parse_message_value(self, value: str) -> Dict[str, Any]:
        """
        Parses the value received via the Kafka consumer and verifies that it
        matches the expected schema.
        :param value: A json formatted string
        :return: A dict with the parsed message
        """
        with metrics.timer(
                "snuba_query_subscriber.parse_message_value.json_parse"):
            wrapper: Dict[str, Any] = json.loads(value)

        with metrics.timer(
                "snuba_query_subscriber.parse_message_value.json_validate_wrapper"
        ):
            try:
                jsonschema.validate(wrapper, SUBSCRIPTION_WRAPPER_SCHEMA)
            except jsonschema.ValidationError:
                metrics.incr("snuba_query_subscriber.message_wrapper_invalid")
                raise InvalidSchemaError(
                    "Message wrapper does not match schema")

        schema_version: int = wrapper["version"]
        if schema_version not in SUBSCRIPTION_PAYLOAD_VERSIONS:
            metrics.incr(
                "snuba_query_subscriber.message_wrapper_invalid_version")
            raise InvalidMessageError(
                "Version specified in wrapper has no schema")

        payload: Dict[str, Any] = wrapper["payload"]
        with metrics.timer(
                "snuba_query_subscriber.parse_message_value.json_validate_payload"
        ):
            try:
                jsonschema.validate(
                    payload, SUBSCRIPTION_PAYLOAD_VERSIONS[schema_version])
            except jsonschema.ValidationError:
                metrics.incr("snuba_query_subscriber.message_payload_invalid")
                raise InvalidSchemaError(
                    "Message payload does not match schema")
        # XXX: Since we just return the raw dict here, when the payload changes it'll
        # break things. This should convert the payload into a class rather than passing
        # the dict around, but until we get time to refactor we can keep things working
        # here.
        payload.setdefault("values", payload.get("result"))

        payload["timestamp"] = parse_date(
            payload["timestamp"]).replace(tzinfo=pytz.utc)
        return payload
Beispiel #28
0
    def __init__(self,
                 host: str,
                 port: int,
                 topic_name: str,
                 max_polling_timeout: float = 0.001,
                 **kwargs):
        """
        Init Kafka RPCClient.

        Not like the most of the RPC protocols,
        Only one KRPCClient can run on a single Kafka topic.

        If you insist using multiple KRPCClient instances,
        redis must be used, pass argument use_redis=True.

        Args:
            host: kafka broker host
            port: kafka broker port
            topic_name: kafka topic_name, if topic exists,
                        the existing topic will be used,
                        create a new topic otherwise.
            max_polling_timeout: maximum time(seconds) to block waiting for message, event or callback.

            encrypt: default None, if not None, will encrypt the message with the given password. It will slow down performance.
            verify: default False, if True, will verify the message with the given sha3 checksum from the headers.

            use_redis: default False, if True, use redis as cache, built-in QueueDict instead.

        """

        bootstrap_servers = '{}:{}'.format(host, port)

        self.topic_name = topic_name

        self.server_topic = 'krpc_{}_server'.format(topic_name)
        self.client_topic = 'krpc_{}_client'.format(topic_name)

        # set max_polling_timeout
        assert max_polling_timeout > 0, 'max_polling_timeout must be greater than 0'
        self.max_polling_timeout = max_polling_timeout

        self.consumer = Consumer({
            'bootstrap.servers': bootstrap_servers,
            'group.id': 'krpc',
            'auto.offset.reset': 'earliest',
            'auto.commit.interval.ms': 1000
        })
        self.producer = Producer({
            'bootstrap.servers': bootstrap_servers,
            'on_delivery': self.delivery_report,
        })

        # add redis cache, for temporarily storage of returned data
        self.use_redis = kwargs.get('use_redis', False)
        self.expire_time = kwargs.get('expire_time', 600)
        if self.use_redis:
            import redis
            redis_port = kwargs.get('redis_port', 6379)
            redis_db = kwargs.get('redis_db', 0)
            redis_password = kwargs.get('redis_password', None)
            self.cache = redis.Redis(host, redis_port, redis_db,
                                     redis_password)
            self.cache_channel = self.cache.pubsub()
        else:
            self.cache = QueueDict(maxlen=2048, expire=self.expire_time)

        self.consumer.subscribe([self.client_topic])

        # set msgpack packer & unpacker
        self.packer = msgpack.Packer(use_bin_type=True)
        self.unpacker = msgpack.Unpacker(use_list=False, raw=False)

        self.verify = kwargs.get('verify', False)
        self.verification_method = kwargs.get('verification', 'crc32')
        if self.verification_method == 'crc32':
            self.verification_method = lambda x: hex(zlib.crc32(x)).encode()
        elif isinstance(self.verification_method, Callable):
            self.verification_method = self.verification_method
        else:
            raise AssertionError('not supported verification function.')

        self.encrypt = kwargs.get('encrypt', None)
        if self.encrypt is not None:
            self.encrypt = AESEncryption(self.encrypt, encrypt_length=16)

        self.is_closed = False
        # coroutine pool
        self.pool = ThreadAsyncPoolExecutor(pool_size=1)
        self.pool.submit(self.wait_forever)

        # handshake, if's ok not to handshake, but the first rpc would be slow.
        if kwargs.get('handshake', True):
            self.handshaked = False
            self.producer.produce(self.server_topic,
                                  b'handshake',
                                  b'handshake',
                                  headers={'checksum': None})
            self.producer.poll(0.0)
            logger.info('sending handshake')
            while True:
                if self.handshaked:
                    break
                time.sleep(1)

        # acknowledge, disable ack will double the speed, but not exactly safe.
        self.ack = kwargs.get('ack', False)
Beispiel #29
0
if __name__ == '__main__':
    # 步驟1.設定要連線到Kafka集群的相關設定
    # Consumer configuration
    # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
    props = {
        'bootstrap.servers': 'localhost:9092',        # Kafka集群在那裡? (置換成要連接的Kafka集群)
        'group.id': 'tdea',                             # ConsumerGroup的名稱 (置換成你/妳的學員ID)
        'auto.offset.reset': 'earliest',                # 是否從這個ConsumerGroup尚未讀取的partition/offset開始讀
        'enable.auto.commit': True,                     # 是否啟動自動commit
        'auto.commit.interval.ms': 5000,                # 自動commit的interval
        'on_commit': print_commit_result,               # 設定接收commit訊息的callback函數
        'error_cb': error_cb                            # 設定接收error訊息的callback函數
    }

    # 步驟2. 產生一個Kafka的Consumer的實例
    consumer = Consumer(props)
    # 步驟3. 指定想要訂閱訊息的topic名稱
    topicName = 'ak03.test'
    # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic
    consumer.subscribe([topicName])

    # 步驟5. 持續的拉取Kafka有進來的訊息
    try:
        while True:
            # 請求Kafka把新的訊息吐出來
            records = consumer.consume(num_messages=500, timeout=1.0)  # 批次讀取
            if records is None:
                continue

            for record in records:
                # 檢查是否有錯誤
Beispiel #30
0
class KRPCClient:
    def __init__(self,
                 host: str,
                 port: int,
                 topic_name: str,
                 max_polling_timeout: float = 0.001,
                 **kwargs):
        """
        Init Kafka RPCClient.

        Not like the most of the RPC protocols,
        Only one KRPCClient can run on a single Kafka topic.

        If you insist using multiple KRPCClient instances,
        redis must be used, pass argument use_redis=True.

        Args:
            host: kafka broker host
            port: kafka broker port
            topic_name: kafka topic_name, if topic exists,
                        the existing topic will be used,
                        create a new topic otherwise.
            max_polling_timeout: maximum time(seconds) to block waiting for message, event or callback.

            encrypt: default None, if not None, will encrypt the message with the given password. It will slow down performance.
            verify: default False, if True, will verify the message with the given sha3 checksum from the headers.

            use_redis: default False, if True, use redis as cache, built-in QueueDict instead.

        """

        bootstrap_servers = '{}:{}'.format(host, port)

        self.topic_name = topic_name

        self.server_topic = 'krpc_{}_server'.format(topic_name)
        self.client_topic = 'krpc_{}_client'.format(topic_name)

        # set max_polling_timeout
        assert max_polling_timeout > 0, 'max_polling_timeout must be greater than 0'
        self.max_polling_timeout = max_polling_timeout

        self.consumer = Consumer({
            'bootstrap.servers': bootstrap_servers,
            'group.id': 'krpc',
            'auto.offset.reset': 'earliest',
            'auto.commit.interval.ms': 1000
        })
        self.producer = Producer({
            'bootstrap.servers': bootstrap_servers,
            'on_delivery': self.delivery_report,
        })

        # add redis cache, for temporarily storage of returned data
        self.use_redis = kwargs.get('use_redis', False)
        self.expire_time = kwargs.get('expire_time', 600)
        if self.use_redis:
            import redis
            redis_port = kwargs.get('redis_port', 6379)
            redis_db = kwargs.get('redis_db', 0)
            redis_password = kwargs.get('redis_password', None)
            self.cache = redis.Redis(host, redis_port, redis_db,
                                     redis_password)
            self.cache_channel = self.cache.pubsub()
        else:
            self.cache = QueueDict(maxlen=2048, expire=self.expire_time)

        self.consumer.subscribe([self.client_topic])

        # set msgpack packer & unpacker
        self.packer = msgpack.Packer(use_bin_type=True)
        self.unpacker = msgpack.Unpacker(use_list=False, raw=False)

        self.verify = kwargs.get('verify', False)
        self.verification_method = kwargs.get('verification', 'crc32')
        if self.verification_method == 'crc32':
            self.verification_method = lambda x: hex(zlib.crc32(x)).encode()
        elif isinstance(self.verification_method, Callable):
            self.verification_method = self.verification_method
        else:
            raise AssertionError('not supported verification function.')

        self.encrypt = kwargs.get('encrypt', None)
        if self.encrypt is not None:
            self.encrypt = AESEncryption(self.encrypt, encrypt_length=16)

        self.is_closed = False
        # coroutine pool
        self.pool = ThreadAsyncPoolExecutor(pool_size=1)
        self.pool.submit(self.wait_forever)

        # handshake, if's ok not to handshake, but the first rpc would be slow.
        if kwargs.get('handshake', True):
            self.handshaked = False
            self.producer.produce(self.server_topic,
                                  b'handshake',
                                  b'handshake',
                                  headers={'checksum': None})
            self.producer.poll(0.0)
            logger.info('sending handshake')
            while True:
                if self.handshaked:
                    break
                time.sleep(1)

        # acknowledge, disable ack will double the speed, but not exactly safe.
        self.ack = kwargs.get('ack', False)

    @staticmethod
    def delivery_report(err, msg):
        if err is not None:
            logger.error('request failed: {}'.format(err))
        else:
            logger.info('request sent to {} [{}]'.format(
                msg.topic(), msg.partition()))

    def parse_response(self, msg_value):
        try:
            self.unpacker.feed(msg_value)
            res = next(self.unpacker)
        except Exception as e:
            logger.exception(e)
            res = None
        return res

    def call(self, method_name, *args, **kwargs):
        # rpc call timeout
        # WARNING: if the rpc method has an argument named timeout, it will be not be passed.
        timeout = kwargs.pop('timeout', 10)

        start_time = time.time()

        # send request back to server
        req = {'method_name': method_name, 'args': args, 'kwargs': kwargs}

        req = self.packer.pack(req)

        if self.encrypt:
            req = self.encrypt.encrypt(req)

        if self.verify:
            checksum = self.verification_method(req)
        else:
            checksum = None

        task_id = uuid.uuid4().hex

        self.producer.produce(self.server_topic,
                              req,
                              task_id,
                              headers={'checksum': checksum})

        # waiting for response from server sync/async
        res = self.poll_result_from_redis_cache(task_id, timeout)

        if self.ack:
            self.producer.poll(0.0)

        # do something to the response
        ret = res['ret']
        tact_time_server = res['tact_time']
        server_id = res['server_id']

        end_time = time.time()

        return {
            'ret': ret,
            'tact_time': end_time - start_time,
            'tact_time_server': tact_time_server,
            'server_id': server_id
        }

    def wait_forever(self):
        while True:
            if self.is_closed:
                logger.info('user exit')
                break

            try:
                msg = self.consumer.poll(self.max_polling_timeout)

                if msg is None:
                    continue
                if msg.error():
                    logger.error("consumer error: {}".format(msg.error()))
                    continue

                task_id = msg.key(
                )  # an uuid, the only id that pairs the request and the response

                if task_id == b'handshake':
                    logger.info('handshake succeeded.')
                    self.handshaked = True
                    continue

                res = msg.value()
                headers = msg.headers()
                checksum = headers[0][1]

                if self.verify:
                    signature = self.verification_method(res)
                    if checksum != signature:
                        logger.error(
                            'checksum mismatch of task {}'.format(task_id))
                        continue

                if self.use_redis:
                    self.cache.publish(task_id, res)
                    self.cache.set(task_id, res)
                    self.cache.expire(task_id, self.expire_time)
                else:
                    self.cache[task_id] = res

                # send signal for polling to search for result
                ...

            except Exception as e:
                logger.exception(e)

    def poll_result_from_redis_cache(self, task_id, timeout=10):
        """
        poll_result_from_cache after receiving a signal from waiting
        Args:
            task_id:
            timeout:

        Returns:

        """
        loop_times = int(timeout / self.max_polling_timeout)
        task_id = task_id.encode()
        if self.use_redis:
            self.cache_channel.subscribe(task_id)

            for _ in range(loop_times):
                # if no completion, get message from subscribed channel
                message = self.cache_channel.get_message(
                    timeout=self.max_polling_timeout)
                # else get response from redis db cache
                if message is None:
                    res = self.cache.get(task_id)

                    # if still no response yet, continue polling
                    if res is None:
                        continue
                    break

                if isinstance(message, dict):
                    if isinstance(message['data'], int):
                        continue

                res = message['data']
                break
        else:
            for _ in range(loop_times):
                try:
                    res = self.cache[task_id]
                    break
                except:
                    time.sleep(self.max_polling_timeout)

        try:
            if self.encrypt:
                res = self.encrypt.decrypt(res)

            res = self.parse_response(res)
        except NameError:
            raise TimeoutError

        return res

    def __getattr__(self, method_name):
        return lambda *args, **kwargs: self.call(method_name, *args, **kwargs)

    def close(self):
        self.is_closed = True
        if self.use_redis:
            self.cache_channel.close()
            self.cache.close()
        self.consumer.close()
        self.producer.flush()
        self.pool.shutdown()
class KafkaConsumer:
    def __init__(self, broker_manager, msg_monitor, consumer_id, test_number):
        self.consumer = None
        self.broker_manager = broker_manager
        self.msg_monitor = msg_monitor
        self.consumer_id = consumer_id
        self.actor = f"CONSUMER(Test:{test_number} Id:C{consumer_id})"
        self.terminate = False
        self.topic = None
        self.on_assignment_ctr = 0

    def get_partitions(self, partitions):
        ps = list()
        for p in partitions:
            ps.append(str(p.partition))

        if len(ps) == 0:
            return "none"
        else:
            return ",".join(ps)

    def on_assignment(self, con, partitions):
        console_out(f"Assigned partitions: {self.get_partitions(partitions)}",
                    self.actor)

        if self.on_assignment_ctr == 0:
            self.on_assignment_ctr += 1
            for part in partitions:
                part.offset = 0

        self.consumer.assign(partitions)

    def on_revoke(self, con, partitions):
        console_out(
            f"Unassigned partitions: {self.get_partitions(partitions)}",
            self.actor)
        self.consumer.unassign()

    def create_consumer(self, group_id, topic):
        self.terminate = False
        console_out(
            f"Creating a consumer with bootstrap.servers: {self.broker_manager.get_bootstrap_servers()}",
            self.actor)
        self.consumer = Consumer({
            'bootstrap.servers':
            self.broker_manager.get_bootstrap_servers(),
            'api.version.request':
            True,
            'enable.auto.commit':
            True,
            'group.id':
            group_id,
            'auto.offset.reset':
            'earliest',
            'default.topic.config': {
                'auto.offset.reset': 'smallest'
            }
        })
        self.topic = topic

    def subscribe(self):
        subscribed = False
        while not subscribed:
            try:
                console_out(f"Starting subscription to {self.topic}",
                            self.actor)
                self.consumer.subscribe([self.topic],
                                        on_assign=self.on_assignment,
                                        on_revoke=self.on_revoke)
                console_out(f"Subscribed to {self.topic}", self.actor)
                subscribed = True
            except KafkaError as e:
                console_out(f"Failed to subscribe: {e}", self.actor)
                time.sleep(5)

    def start_consuming(self):
        self.subscribe()

        try:
            msg_ctr = 0
            while not self.terminate:
                msg = self.consumer.poll(2.0)

                if msg is None:
                    continue
                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        continue
                    else:
                        console_out(msg.error(), self.actor)
                        break

                self.msg_monitor.append(msg.value(), self.consumer_id,
                                        self.actor)

            console_out("Consumption terminated", self.actor)
            self.consumer.close()
        except Exception as e:
            console_out("Consumption terminated due to error", self.actor)
            template = "An exception of type {0} occurred. Arguments:{1!r}"
            message = template.format(type(e).__name__, e.args)
            console_out(message, self.actor)

    def stop_consuming(self):
        self.terminate = True
from confluent_kafka import Consumer

c = Consumer({
    'bootstrap.servers': 'mybroker',
    'group.id': 'mygroup',
    'auto.offset.reset': 'earliest'
})

c.subscribe(['mytopic'])

while True:
    msg = c.poll(1.0)

    if msg is None:
        continue
    if msg.error():
        print("Consumer error: {}".format(msg.error()))
        continue

    print('Received message: {}'.format(msg.value().decode('utf-8')))

c.close()
Beispiel #33
0
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        #self.broker_properties = {
        #    #
        #    # TODO
        #    #
        #    "kafka" : "PLAINTEXT://localhost:9092",
        #    "schema_registry" : "http://localhost:8081"
        #}
        self.broker_properties = {
            "bootstrap.servers": "localhost:9092",
            #"bootstrap.servers": "PLAINTEXT://localhost:9092",
            "group.id": "udacity",
            "auto.offset.reset": "earliest" if offset_earliest else "latest"
        }

        # TODO: Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties[
                "schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(self.broker_properties)
            #self.consumer = AvroConsumer(
            #    {
            #        "bootstrap.servers": self.broker_properties["kafka"],
            #        "schema.registry.url": self.broker_properties["schema_registry"],
            #        "group.id": "0",
            #        "auto.offset.reset": "earliest"
            #        self.broker_properties["schema.registry.url"] = "http://localhost:8081"
            #
            #    }
            #)
            logger.info("__init__ - AvroConsumer was created")
        else:
            self.consumer = Consumer(self.broker_properties)
            #self.consumer = Consumer(
            #    {
            #        "bootstrap.servers": self.broker_properties["kafka"],
            #        "group.id": "0",
            #        "auto.offset.reset": "earliest"
            #    }
            #)
            #pass
            logger.info("__init__ - Consumer was created")

        #
        #
        # TODO: Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        #
        #
        logger.info("Consumer will subscribe - %s", self.topic_name_pattern)
        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb (err, partitions):
        pass

    kc = Consumer({'group.id':'test', 'socket.timeout.ms':'100',
                   'session.timeout.ms': 1000, # Avoid close() blocking too long
                   'on_commit': dummy_commit_cb})

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke (consumer, partitions):
        pass

    kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    partitions = list(map(lambda p: TopicPartition("test", p), range(0,100,3)))
    kc.assign(partitions)

    kc.unassign()

    kc.commit(async=True)

    try:
        kc.commit(async=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions if p.offset == -1001]) == len(partitions)

    try:
        offsets = kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT


    kc.close()
Beispiel #35
0
    def run(self) -> None:
        def fail_fast(err: Any, _msg: Any) -> None:
            if err is not None:
                print("Kafka producer delivery error: {}".format(err))
                print("Bailing out...")
                # TODO: should it be sys.exit(-1)?
                raise KafkaException(err)

        def on_commit(err: Any, partitions: List[Any]) -> None:
            if err is not None:
                print("Kafka consumer commit error: {}".format(err))
                print("Bailing out...")
                # TODO: should it be sys.exit(-1)?
                raise KafkaException(err)
            for p in partitions:
                # check for partition-specific commit errors
                print(p)
                if p.error:
                    print("Kafka consumer commit error: {}".format(p.error))
                    print("Bailing out...")
                    # TODO: should it be sys.exit(-1)?
                    raise KafkaException(p.error)
            print("Kafka consumer commit successful")
            pass

        def on_rebalance(consumer: Consumer, partitions: List[Any]) -> None:
            for p in partitions:
                if p.error:
                    raise KafkaException(p.error)
            print("Kafka partitions rebalanced: {} / {}".format(
                consumer, partitions))

        consumer_conf = self.kafka_config.copy()
        consumer_conf.update({
            "group.id": self.consumer_group,
            "on_commit": fail_fast,
            # messages don't have offset marked as stored until pushed to
            # elastic, but we do auto-commit stored offsets to broker
            "enable.auto.commit": True,
            "enable.auto.offset.store": False,
            # user code timeout; if no poll after this long, assume user code
            # hung and rebalance (default: 5min)
            "max.poll.interval.ms": 180000,
            "default.topic.config": {
                "auto.offset.reset": "latest",
            },
        })
        consumer = Consumer(consumer_conf)

        producer_conf = self.kafka_config.copy()
        producer_conf.update({
            "delivery.report.only.error": True,
            "default.topic.config": {
                "request.required.acks": -1,  # all brokers must confirm
            },
        })
        producer = Producer(producer_conf)

        consumer.subscribe(
            [self.consume_topic],
            on_assign=on_rebalance,
            on_revoke=on_rebalance,
        )
        print("Kafka consuming {}".format(self.consume_topic))

        while True:
            msg = consumer.poll(self.poll_interval)
            if not msg:
                print("nothing new from kafka (poll_interval: {} sec)".format(
                    self.poll_interval))
                continue
            if msg.error():
                raise KafkaException(msg.error())

            cle = json.loads(msg.value().decode("utf-8"))
            # print(cle)
            print("processing changelog index {}".format(cle["index"]))
            release_ids = []
            new_release_ids = []
            file_ids = []
            fileset_ids = []
            webcapture_ids = []
            container_ids = []
            work_ids = []
            release_edits = cle["editgroup"]["edits"]["releases"]
            for re in release_edits:
                release_ids.append(re["ident"])
                # filter to direct release edits which are not updates
                if not re.get("prev_revision") and not re.get(
                        "redirect_ident"):
                    new_release_ids.append(re["ident"])
            file_edits = cle["editgroup"]["edits"]["files"]
            for e in file_edits:
                file_ids.append(e["ident"])
            fileset_edits = cle["editgroup"]["edits"]["filesets"]
            for e in fileset_edits:
                fileset_ids.append(e["ident"])
            webcapture_edits = cle["editgroup"]["edits"]["webcaptures"]
            for e in webcapture_edits:
                webcapture_ids.append(e["ident"])
            container_edits = cle["editgroup"]["edits"]["containers"]
            for e in container_edits:
                container_ids.append(e["ident"])
            work_edits = cle["editgroup"]["edits"]["works"]
            for e in work_edits:
                work_ids.append(e["ident"])

            # TODO: do these fetches in parallel using a thread pool?
            for ident in set(file_ids):
                file_entity = self.api.get_file(ident, expand=None)
                # update release when a file changes
                # TODO: also fetch old version of file and update any *removed*
                # release idents (and same for filesets, webcapture updates)
                release_ids.extend(file_entity.release_ids or [])
                file_dict = self.api.api_client.sanitize_for_serialization(
                    file_entity)
                producer.produce(
                    self.file_topic,
                    json.dumps(file_dict).encode("utf-8"),
                    key=ident.encode("utf-8"),
                    on_delivery=fail_fast,
                )

            # TODO: topic for fileset updates
            for ident in set(fileset_ids):
                fileset_entity = self.api.get_fileset(ident, expand=None)
                # update release when a fileset changes
                release_ids.extend(fileset_entity.release_ids or [])

            # TODO: topic for webcapture updates
            for ident in set(webcapture_ids):
                webcapture_entity = self.api.get_webcapture(ident, expand=None)
                # update release when a webcapture changes
                release_ids.extend(webcapture_entity.release_ids or [])

            for ident in set(container_ids):
                container = self.api.get_container(ident)
                container_dict = self.api.api_client.sanitize_for_serialization(
                    container)
                producer.produce(
                    self.container_topic,
                    json.dumps(container_dict).encode("utf-8"),
                    key=ident.encode("utf-8"),
                    on_delivery=fail_fast,
                )

            for ident in set(release_ids):
                release = self.api.get_release(
                    ident,
                    expand="files,filesets,webcaptures,container,creators")
                if release.work_id:
                    work_ids.append(release.work_id)
                release_dict = self.api.api_client.sanitize_for_serialization(
                    release)
                producer.produce(
                    self.release_topic,
                    json.dumps(release_dict).encode("utf-8"),
                    key=ident.encode("utf-8"),
                    on_delivery=fail_fast,
                )
                # for ingest requests, filter to "new" active releases with no matched files
                if release.ident in new_release_ids:
                    ir = release_ingest_request(
                        release, ingest_request_source="fatcat-changelog")
                    if ir and not release.files and self.want_live_ingest(
                            release, ir):
                        producer.produce(
                            self.ingest_file_request_topic,
                            json.dumps(ir).encode("utf-8"),
                            # key=None,
                            on_delivery=fail_fast,
                        )

            # send work updates (just ident and changelog metadata) to scholar for re-indexing
            for ident in set(work_ids):
                assert ident
                key = f"work_{ident}"
                work_ident_dict = dict(
                    key=key,
                    type="fatcat_work",
                    work_ident=ident,
                    updated=cle["timestamp"],
                    fatcat_changelog_index=cle["index"],
                )
                producer.produce(
                    self.work_ident_topic,
                    json.dumps(work_ident_dict).encode("utf-8"),
                    key=key.encode("utf-8"),
                    on_delivery=fail_fast,
                )

            producer.flush()
            # TODO: publish updated 'work' entities to a topic
            consumer.store_offsets(message=msg)
from confluent_kafka import Consumer, KafkaError, TopicPartition

c = Consumer({
    'bootstrap.servers': '10.211.55.3:29092',
    'group.id': 'mygroup2',
    'default.topic.config': {
        'auto.offset.reset': 'smallest'  # largest
    }
})

# tp = TopicPartition("mytopic", 2, 0)
# c.assign([tp])
# c.seek(tp)

c.subscribe(['mytopic'])

while True:
    msg = c.poll(1.0)

    if msg is None:
        continue
    if msg.error():
        if msg.error().code() == KafkaError._PARTITION_EOF:
            continue
        else:
            print(msg.error())
            break

    print('Received message: {} {} {}'.format(msg.value().decode('utf-8'),
                                              msg.topic(), msg.partition()))
Beispiel #37
0
    def run(self) -> None:
        logger.debug("Starting snuba query subscriber")
        self.offsets.clear()

        def on_assign(consumer: Consumer,
                      partitions: List[TopicPartition]) -> None:
            updated_partitions: List[TopicPartition] = []
            for partition in partitions:
                if self.resolve_partition_force_offset:
                    partition = self.resolve_partition_force_offset(partition)
                    updated_partitions.append(partition)

                if partition.offset == OFFSET_INVALID:
                    updated_offset = None
                else:
                    updated_offset = partition.offset
                self.offsets[partition.partition] = updated_offset
            if updated_partitions:
                self.consumer.assign(updated_partitions)
            logger.info(
                "query-subscription-consumer.on_assign",
                extra={
                    "offsets": str(self.offsets),
                    "partitions": str(partitions),
                },
            )

        def on_revoke(consumer: Consumer,
                      partitions: List[TopicPartition]) -> None:
            partition_numbers = [
                partition.partition for partition in partitions
            ]
            self.commit_offsets(partition_numbers)
            for partition_number in partition_numbers:
                self.offsets.pop(partition_number, None)
            logger.info(
                "query-subscription-consumer.on_revoke",
                extra={
                    "offsets": str(self.offsets),
                    "partitions": str(partitions),
                },
            )

        self.consumer = Consumer(self.cluster_options)
        self.__shutdown_requested = False

        if settings.KAFKA_CONSUMER_AUTO_CREATE_TOPICS:
            # This is required for confluent-kafka>=1.5.0, otherwise the topics will
            # not be automatically created.
            admin_client = AdminClient(self.admin_cluster_options)
            wait_for_topics(admin_client, [self.topic])

        self.consumer.subscribe([self.topic],
                                on_assign=on_assign,
                                on_revoke=on_revoke)

        i = 0
        while not self.__shutdown_requested:
            message = self.consumer.poll(0.1)
            if message is None:
                continue

            error = message.error()
            if error is not None:
                raise KafkaException(error)

            i = i + 1

            with sentry_sdk.start_transaction(
                    op="handle_message",
                    name="query_subscription_consumer_process_message",
                    sampled=random() <=
                    options.get("subscriptions-query.sample-rate"),
            ), metrics.timer("snuba_query_subscriber.handle_message"):
                self.handle_message(message)

            # Track latest completed message here, for use in `shutdown` handler.
            self.offsets[message.partition()] = message.offset() + 1

            if i % self.commit_batch_size == 0:
                logger.debug("Committing offsets")
                self.commit_offsets()

        logger.debug("Committing offsets and closing consumer")
        self.commit_offsets()
        self.consumer.close()
Beispiel #38
0
 def setup(self):
     self.consumer = Consumer(**self.get_consumer_settings())
     self.serializer = self.get_message_serializer()
     self.set_topic()
Beispiel #39
0
 def _create_consumer(self, config) -> Consumer:
     return Consumer(config)
class VerifiableConsumer(VerifiableClient):
    """
    confluent-kafka-python backed VerifiableConsumer class for use with
    Kafka's kafkatests client tests.
    """
    def __init__(self, conf):
        """
        conf is a config dict passed to confluent_kafka.Consumer()
        """
        super(VerifiableConsumer, self).__init__(conf)
        self.conf['on_commit'] = self.on_commit
        self.consumer = Consumer(**conf)
        self.consumed_msgs = 0
        self.consumed_msgs_last_reported = 0
        self.consumed_msgs_at_last_commit = 0
        self.use_auto_commit = False
        self.use_async_commit = False
        self.max_msgs = -1
        self.assignment = []
        self.assignment_dict = dict()

    def find_assignment(self, topic, partition):
        """ Find and return existing assignment based on topic and partition,
        or None on miss. """
        skey = '%s %d' % (topic, partition)
        return self.assignment_dict.get(skey)

    def send_records_consumed(self, immediate=False):
        """ Send records_consumed, every 100 messages, on timeout,
            or if immediate is set. """
        if self.consumed_msgs <= self.consumed_msgs_last_reported + (0 if immediate else 100):
            return

        if len(self.assignment) == 0:
            return

        d = {'name': 'records_consumed',
             'count': self.consumed_msgs - self.consumed_msgs_last_reported,
             'partitions': []}

        for a in self.assignment:
            if a.min_offset == -1:
                # Skip partitions that havent had any messages since last time.
                # This is to circumvent some minOffset checks in kafkatest.
                continue
            d['partitions'].append(a.to_dict())
            a.min_offset = -1

        self.send(d)
        self.consumed_msgs_last_reported = self.consumed_msgs

    def send_assignment(self, evtype, partitions):
        """ Send assignment update, evtype is either 'assigned' or 'revoked' """
        d = {'name': 'partitions_' + evtype,
             'partitions': [{'topic': x.topic, 'partition': x.partition} for x in partitions]}
        self.send(d)

    def on_assign(self, consumer, partitions):
        """ Rebalance on_assign callback """
        old_assignment = self.assignment
        self.assignment = [AssignedPartition(p.topic, p.partition) for p in partitions]
        # Move over our last seen offsets so that we can report a proper
        # minOffset even after a rebalance loop.
        for a in old_assignment:
            b = self.find_assignment(a.topic, a.partition)
            b.min_offset = a.min_offset

        self.assignment_dict = {a.skey: a for a in self.assignment}
        self.send_assignment('assigned', partitions)

    def on_revoke(self, consumer, partitions):
        """ Rebalance on_revoke callback """
        # Send final consumed records prior to rebalancing to make sure
        # latest consumed is in par with what is going to be committed.
        self.send_records_consumed(immediate=True)
        self.do_commit(immediate=True, asynchronous=False)
        self.assignment = list()
        self.assignment_dict = dict()
        self.send_assignment('revoked', partitions)

    def on_commit(self, err, partitions):
        """ Offsets Committed callback """
        if err is not None and err.code() == KafkaError._NO_OFFSET:
            self.dbg('on_commit(): no offsets to commit')
            return

        # Report consumed messages to make sure consumed position >= committed position
        self.send_records_consumed(immediate=True)

        d = {'name': 'offsets_committed',
             'offsets': []}

        if err is not None:
            d['success'] = False
            d['error'] = str(err)
        else:
            d['success'] = True
            d['error'] = ''

        for p in partitions:
            pd = {'topic': p.topic, 'partition': p.partition, 'offset': p.offset}
            if p.error is not None:
                pd['error'] = str(p.error)
            d['offsets'].append(pd)

        if len(self.assignment) == 0:
            self.dbg('Not sending offsets_committed: No current assignment: would be: %s' % d)
            return

        self.send(d)

    def do_commit(self, immediate=False, asynchronous=None):
        """ Commit every 1000 messages or whenever there is a consume timeout
            or immediate. """
        if (self.use_auto_commit
                or self.consumed_msgs_at_last_commit + (0 if immediate else 1000) >
                self.consumed_msgs):
            return

        # Make sure we report consumption before commit,
        # otherwise tests may fail because of commit > consumed
        if self.consumed_msgs_at_last_commit < self.consumed_msgs:
            self.send_records_consumed(immediate=True)

        if asynchronous is None:
            async_mode = self.use_async_commit
        else:
            async_mode = asynchronous

        self.dbg('Committing %d messages (Async=%s)' %
                 (self.consumed_msgs - self.consumed_msgs_at_last_commit,
                  async_mode))

        retries = 3
        while True:
            try:
                self.dbg('Commit')
                offsets = self.consumer.commit(asynchronous=async_mode)
                self.dbg('Commit done: offsets %s' % offsets)

                if not async_mode:
                    self.on_commit(None, offsets)

                break

            except KafkaException as e:
                if e.args[0].code() == KafkaError._NO_OFFSET:
                    self.dbg('No offsets to commit')
                    break
                elif e.args[0].code() in (KafkaError.REQUEST_TIMED_OUT,
                                          KafkaError.NOT_COORDINATOR_FOR_GROUP,
                                          KafkaError._WAIT_COORD):
                    self.dbg('Commit failed: %s (%d retries)' % (str(e), retries))
                    if retries <= 0:
                        raise
                    retries -= 1
                    time.sleep(1)
                    continue
                else:
                    raise

        self.consumed_msgs_at_last_commit = self.consumed_msgs

    def msg_consume(self, msg):
        """ Handle consumed message (or error event) """
        if msg.error():
            self.err('Consume failed: %s' % msg.error(), term=False)
            return

        if False:
            self.dbg('Read msg from %s [%d] @ %d' %
                     (msg.topic(), msg.partition(), msg.offset()))

        if self.max_msgs >= 0 and self.consumed_msgs >= self.max_msgs:
            return  # ignore extra messages

        # Find assignment.
        a = self.find_assignment(msg.topic(), msg.partition())
        if a is None:
            self.err('Received message on unassigned partition %s [%d] @ %d' %
                     (msg.topic(), msg.partition(), msg.offset()), term=True)

        a.consumed_msgs += 1
        if a.min_offset == -1:
            a.min_offset = msg.offset()
        if a.max_offset < msg.offset():
            a.max_offset = msg.offset()

        self.consumed_msgs += 1

        self.consumer.store_offsets(message=msg)
        self.send_records_consumed(immediate=False)
        self.do_commit(immediate=False)
Beispiel #41
0
from confluent_kafka import Consumer, KafkaError, KafkaException
import sys

conf = {
    "bootstrap.servers": "0.0.0.0:9092,0.0.0.0:9092",
    "group.id": "foo",
    "auto.offset.reset": "smallest",
    "enable.auto.commit": True,
}

consumer = Consumer(conf)

try:
    consumer.subscribe(["test2"])
    while True:

        msg = consumer.poll(timeout=1.0)
        if msg is None:
            continue

        if msg.error():
            if msg.error().code() == KafkaError._PARTITION_EOF:
                # End of partition event
                sys.stderr.write("%% %s [%d] reached end at offset %d\n" %
                                 (msg.topic(), msg.partition(), msg.offset()))
            elif msg.error():
                raise KafkaException(msg.error())
        else:
            print(msg.value())
            message = msg.value()
 def __init__(self, conf, topic_name):
     self.consumer = Consumer(conf)
     self.topic_name = topic_name
     self.running = True
     self._observers = []
Beispiel #43
0
if __name__ == '__main__':

    # Initialization
    args = ccloud_lib.parse_args()
    config_file = args.config_file
    topic = args.topic
    conf = ccloud_lib.read_ccloud_config(config_file)

    # Create Consumer instance
    # 'auto.offset.reset=earliest' to start reading from the beginning of the
    #   topic if no committed offsets exist
    c = Consumer({
        'bootstrap.servers': conf['bootstrap.servers'],
        'sasl.mechanisms': 'PLAIN',
        'security.protocol': 'SASL_SSL',
        'sasl.username': conf['sasl.username'],
        'sasl.password': conf['sasl.password'],
        'group.id': 'python_example_group_1',
        'auto.offset.reset': 'earliest'
    })

    # Subscribe to topic
    c.subscribe([topic])

    # Process messages
    total_count = 0
    try:
        while True:
            print("Waiting for message or event/error in poll()")
            msg = c.poll(1.0)
            if msg is None:
Beispiel #44
0
class EventProcessor(object):
    __metaclass__ = abc.ABC

    _DEFAULT_KAFKA_CONSUMER_CONFIG = {
        'bootstrap.servers': 'kafka:9092',
        'enable.auto.commit': True,
        'auto.commit.interval.ms': 10000,
        'session.timeout.ms': 30000,
        'fetch.max.bytes': 5000012,
        'auto.offset.reset': 'latest',
    }

    _DEFAULT_KAFKA_PRODUCER_CONFIG = {
        'bootstrap.servers': 'kafka:9092',
        'acks': 'all',
        'retries': 0,
        'linger.ms': 20,
    }

    def __init__(self, **kwargs):
        """initialize EventProcessor with Kafka Prodcuer and Consumer"""
        self.logger = logging.getLogger(__name__)

        self._input_topics = kwargs.get('input_topics')
        self._output_topics = kwargs.get('output_topics')
        self._invocation_id = kwargs.get('invocation_id')
        self._bootstrap_servers = kwargs.get('bootstrap_servers')
        self.static_properties = kwargs.get('static_properties')

        self._running = False
        self._threads = {}

        if self._bootstrap_servers is not None:
            self._DEFAULT_KAFKA_CONSUMER_CONFIG[
                'bootstrap.servers'] = self._bootstrap_servers
            self._DEFAULT_KAFKA_PRODUCER_CONFIG[
                'bootstrap.servers'] = self._bootstrap_servers

        self._DEFAULT_KAFKA_CONSUMER_CONFIG[
            'group.id'] = 'streampipes_python_' + self._invocation_id

        self._producer = Producer(self._DEFAULT_KAFKA_PRODUCER_CONFIG)
        self._consumer = Consumer(self._DEFAULT_KAFKA_CONSUMER_CONFIG)
        #self._create_topic(topic=self._output_topics, conf=self._DEFAULT_KAFKA_PRODUCER_CONFIG)

        self.on_invocation()

    def init(self):
        self.logger.info('start processor {}'.format(self.invocation_id))
        thread = threading.Thread(target=self._consume,
                                  name=self.invocation_id)
        thread.start()
        self._threads['kafka'] = thread

    def active_threads(self):
        return self._threads

    @property
    def invocation_id(self):
        return self._invocation_id

    def __del__(self):
        pass

    @abc.abstractmethod
    def on_invocation(self):
        """ on_invocation is called when processor is started """

    @abc.abstractmethod
    def on_event(self, event):
        """ on_event receives kafka consumer messages """
        pass

    @abc.abstractmethod
    def on_detach(self):
        """ on_detach is called when processor is stopped """
        pass

    def _on_event(self, event):
        result = self.on_event(event)

        if result is not None:
            self._produce(result)

    def _consume(self):
        """ retrieve events from kafka """
        self._consumer.subscribe(topics=[self._input_topics])
        self._running = True

        while self._running:
            # fetch records from kafka and send to
            msg = self._consumer.poll(timeout=1.0)

            if msg is None:
                continue
            elif msg.error():
                if msg.error().str() != "Broker: No more messages":
                    self.logger.error("Consumer error: {}".format(msg.error()))
                    continue
            else:
                try:
                    # json -> dict
                    event = json.loads(msg.value().decode('utf-8'))
                    if isinstance(event, int):
                        self.logger.info(
                            "Integer not allowed {}".format(event))
                        continue
                except ValueError as e:
                    self.logger.info("Not a valid json {}".format(e))
                    continue

                self._on_event(event)

    def _produce(self, result):
        """ send events to kafka """
        event = json.dumps(result).encode('utf-8')
        try:
            # dict -> json
            self._producer.produce(self._output_topics, value=event)
        except BufferError:
            self._producer.poll(1)

    # def _create_topic(self, topic=None, conf=None):
    #     """ Create the topic if it doesn't exist """
    #     admin = AdminClient(conf)
    #     fs = admin.create_topics([NewTopic(topic, num_partitions=1, replication_factor=1)])
    #     f = fs[topic]
    #     try:
    #         f.result()
    #     except KafkaException as ex:
    #         if ex.args[0].code() == KafkaError.TOPIC_ALREADY_EXISTS:
    #             self.logger.warning("Topic {} already exists: continue".format(topic))
    #         else:
    #             raise

    def stop(self):
        self.logger.info('stop processor {}'.format(self.invocation_id))
        self._running = False
        self._consumer.close()
        self._producer.flush()
        self.on_detach()
Beispiel #45
0
from confluent_kafka import Consumer, KafkaError
import sys
import uuid

topic = sys.argv[1]

c = Consumer({
    #'bootstrap.servers': '172.17.0.3:9092,172.17.0.4:9093,172.17.0.5:9094',
    'bootstrap.servers': '172.17.0.4:9093,172.17.0.5:9094',
    'api.version.request': True,
    'enable.auto.commit': True,
    'group.id': str(uuid.uuid1()),
    'auto.offset.reset': 'earliest'
    #'default.topic.config': {
    #    'auto.offset.reset': 'smallest'
    #}
})


def print_assignment(consumer, partitions):
    for p in partitions:
        p.offset = 0
    print('assign', partitions)
    consumer.assign(partitions)

    # Subscribe to topics


c.subscribe([topic], on_assign=print_assignment)

while True:
Beispiel #46
0
class SyncReport:
    def __init__(self, group, token, optimalq_connector, pool_uid,
                 call_reports_topic):
        self._consumer = Consumer({
            "bootstrap.servers": "",
            "security.protocol": "SASL_SSL",
            "sasl.mechanisms": "PLAIN",
            "sasl.username": "",
            "sasl.password": "",
            'group.id': group,
            'enable.auto.commit': False,
            'auto.offset.reset': 'earliest'
        })
        self._consumer.subscribe([call_reports_topic])
        self._headers = {
            "X-Auth-Token": "{}".format(token),
            "Content-Type": "application/json"
        }
        self._optimalq_connector = optimalq_connector
        self._pool_uid = pool_uid
        self._optimalq_url = ''

    def start(self):
        """
        Get messages from call_report_topic.
        Send the call reports to post_call_report
        """
        while True:
            msg = self._consumer.poll(0.1)
            if msg is None:
                continue
            elif not msg.error():  #Received message
                self.post_call_report(msg.value())
            elif msg.error().code() == KafkaError._PARTITION_EOF:
                logging.info('End of partition reached {}/{}'.format(
                    msg.topic(), msg.partition()))
            else:
                logging.error('Error occurred: {}'.format(msg.error().str()))

    def post_call_report(self, call_report):
        """
        Post call report json to OptimalQ API by pool uid
        :param call_report:
        :return:
        """
        url = '{}/v1/pools/{}/call_reports'.format(self._optimalq_url,
                                                   self._pool_uid)
        success_post_call_report = requests.post(url=url,
                                                 data=call_report,
                                                 headers=self._headers)
        code = success_post_call_report.status_code
        counter = 5

        while (counter > 0) and ((code < 200) or (code > 299)):
            counter -= 1
            token = self._optimalq_connector.get_token()

            if token is not None:
                self._headers = {
                    "X-Auth-Token": "{}".format(token),
                    "Content-Type": "application/json"
                }

            success_post_call_report = requests.post(url=url,
                                                     data=call_report,
                                                     headers=self._headers)
            code = success_post_call_report.status_code

        if (code > 199) and (code < 300):
            self._consumer.commit()
            logging.info('Sent call report for pool: {}'.format(
                self._pool_uid))
            return

        logging.error(
            'Connection to OptimalQ failed while trying to send call report {}. code: {}, error: {}'
            .format(self._pool_uid, code, success_post_call_report.content))

    def terminate(self):
        self._consumer.close()
Beispiel #47
0
    dest="topic",
    help="Topic to listen",
)
parser.add_option(
    "-s",
    "--servers",
    #default=str(DEFAULT_SERVER),
    dest="servers",
    help="Kafka servers",
)

(options, _) = parser.parse_args()

c = Consumer({
    'bootstrap.servers': options.servers,
    'group.id': 'mygroup',
    'auto.offset.reset': 'earliest'
})

c.subscribe([options.topic])
#c.subscribe(['Cisco-IOS-XR-qos-ma-oper.qos.nodes.node.policy-map.interface-table.interface.member-interfaces.member-interface.output.service-policy-names.service-policy-instance.statistics'])

print("waiting for packets")
while True:
    msg = c.poll(1.0)

    if msg is None:
        continue
    if msg.error():
        print("Consumer error: {}".format(msg.error()))
        continue
Beispiel #48
0
print("Program Started")
from confluent_kafka import Consumer
from configFile import *
from FinalProducer import KafkaProducer
from URLReader import UrlReader
from Mongodb import MongoDb

producer = KafkaProducer()
url_reader = UrlReader()
mongodb = MongoDb()

c = Consumer(
    {
        'bootstrap.servers': BOOTSTRAP_SERVERS,
        'group.id': GROUP_CONTENT_CONSUMER
    }
)  #, 'max.partition.fetch.bytes': 200000000, 'receive.message.max.bytes': 1000000000});
c.subscribe([UNPROCESSED_URL_TOPIC])

running = True
while running:
    print("Waiting for unprocessed url to be fetched")
    data = c.poll()
    if not data.error():
        url = data.value()
        print("Fetched url from Kafka - " + str(url))
        print("Checking if this url is already processed...")
        if mongodb.is_url_processed(url):
            print("Url already processed... Skipping it")
        else:
            print("Url not processed.... adding to the topic")
Beispiel #49
0
def test_consumer_rebalance_from_committed_offset(requires_kafka):
    consumer_group = "consumer-{}".format(uuid.uuid1().hex)
    synchronize_commit_group = "consumer-{}".format(uuid.uuid1().hex)

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer({
        "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"],
        "on_delivery": record_message_delivered,
    })

    with create_topic(
            partitions=2) as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(4):
            producer.produce(topic,
                             "{}".format(i).encode("utf8"),
                             partition=i % 2)

        assert producer.flush(
            5) == 0, "producer did not successfully flush queue"

        Consumer({
            "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"],
            "group.id": consumer_group
        }).commit(
            offsets=[
                TopicPartition(message.topic(), message.partition(),
                               message.offset() + 1)
                for message in messages_delivered[topic][:2]
            ],
            asynchronous=False,
        )

        consumer_a = SynchronizedConsumer(
            bootstrap_servers=os.environ["SENTRY_KAFKA_HOSTS"],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        assignments_received = defaultdict(list)

        def on_assign(consumer, assignment):
            assignments_received[consumer].append(assignment)

        consumer_a.subscribe([topic], on_assign=on_assign)

        # Wait until the first consumer has received its assignments.
        for i in xrange(10):  # this takes a while
            assert consumer_a.poll(1) is None
            if assignments_received[consumer_a]:
                break

        assert (len(assignments_received[consumer_a]) == 1
                ), "expected to receive partition assignment"
        assert set(
            (i.topic, i.partition)
            for i in assignments_received[consumer_a][0]) == set([(topic, 0),
                                                                  (topic, 1)])

        assignments_received[consumer_a].pop()

        consumer_b = SynchronizedConsumer(
            bootstrap_servers=os.environ["SENTRY_KAFKA_HOSTS"],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        consumer_b.subscribe([topic], on_assign=on_assign)

        assignments = {}

        # Wait until *both* consumers have received updated assignments.
        for consumer in [consumer_a, consumer_b]:
            for i in xrange(10):  # this takes a while
                assert consumer.poll(1) is None
                if assignments_received[consumer]:
                    break

            assert (len(assignments_received[consumer]) == 1
                    ), "expected to receive partition assignment"
            assert (len(assignments_received[consumer][0]) == 1
                    ), "expected to have a single partition assignment"

            i = assignments_received[consumer][0][0]
            assignments[(i.topic, i.partition)] = consumer

        assert set(assignments.keys()) == set([(topic, 0), (topic, 1)])

        for expected_message in messages_delivered[topic][2:]:
            consumer = assignments[(expected_message.topic(),
                                    expected_message.partition())]

            # Make sure that there are no messages ready to consume.
            assert consumer.poll(1) is None

            # Move the committed offset forward for our synchronizing group.
            producer.produce(
                commit_log_topic,
                key="{}:{}:{}".format(expected_message.topic(),
                                      expected_message.partition(),
                                      synchronize_commit_group).encode("utf8"),
                value="{}".format(expected_message.offset() +
                                  1).encode("utf8"),
            )

            assert producer.flush(
                5) == 0, "producer did not successfully flush queue"

            # We should have received a single message.
            # TODO: Can we also assert that the position is unpaused?)
            for i in xrange(5):
                received_message = consumer.poll(1)
                if received_message is not None:
                    break

            assert received_message is not None, "no message received"

            assert received_message.topic() == expected_message.topic()
            assert received_message.partition() == expected_message.partition()
            assert received_message.offset() == expected_message.offset()

            # We should not be able to continue reading into the topic.
            # TODO: Can we assert that the position is paused?
            assert consumer.poll(1) is None
Beispiel #50
0
def test_consumer_start_from_committed_offset(requires_kafka):
    consumer_group = "consumer-{}".format(uuid.uuid1().hex)
    synchronize_commit_group = "consumer-{}".format(uuid.uuid1().hex)

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer({
        "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"],
        "on_delivery": record_message_delivered,
    })

    with create_topic() as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(3):
            producer.produce(topic, "{}".format(i).encode("utf8"))

        assert producer.flush(
            5) == 0, "producer did not successfully flush queue"

        Consumer({
            "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"],
            "group.id": consumer_group
        }).commit(message=messages_delivered[topic][0], asynchronous=False)

        # Create the synchronized consumer.
        consumer = SynchronizedConsumer(
            bootstrap_servers=os.environ["SENTRY_KAFKA_HOSTS"],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        assignments_received = []

        def on_assign(c, assignment):
            assert c is consumer
            assignments_received.append(assignment)

        consumer.subscribe([topic], on_assign=on_assign)

        # Wait until we have received our assignments.
        for i in xrange(10):  # this takes a while
            assert consumer.poll(1) is None
            if assignments_received:
                break

        assert len(assignments_received
                   ) == 1, "expected to receive partition assignment"
        assert set((i.topic, i.partition)
                   for i in assignments_received[0]) == set([(topic, 0)])

        # TODO: Make sure that all partitions are paused on assignment.

        # Move the committed offset forward for our synchronizing group.
        message = messages_delivered[topic][0]
        producer.produce(
            commit_log_topic,
            key="{}:{}:{}".format(message.topic(), message.partition(),
                                  synchronize_commit_group).encode("utf8"),
            value="{}".format(message.offset() + 1).encode("utf8"),
        )

        # Make sure that there are no messages ready to consume.
        assert consumer.poll(1) is None

        # Move the committed offset forward for our synchronizing group.
        message = messages_delivered[topic][0 + 1]  # second message
        producer.produce(
            commit_log_topic,
            key="{}:{}:{}".format(message.topic(), message.partition(),
                                  synchronize_commit_group).encode("utf8"),
            value="{}".format(message.offset() + 1).encode("utf8"),
        )

        assert producer.flush(
            5) == 0, "producer did not successfully flush queue"

        # We should have received a single message.
        # TODO: Can we also assert that the position is unpaused?)
        for i in xrange(5):
            message = consumer.poll(1)
            if message is not None:
                break

        assert message is not None, "no message received"

        expected_message = messages_delivered[topic][0 + 1]  # second message
        assert message.topic() == expected_message.topic()
        assert message.partition() == expected_message.partition()
        assert message.offset() == expected_message.offset()

        # We should not be able to continue reading into the topic.
        # TODO: Can we assert that the position is paused?
        assert consumer.poll(1) is None
from confluent_kafka import Consumer, KafkaError
import downloader

settings = {
    'bootstrap.servers': 'localhost:9092',
    'group.id': 'mygroup',
    'client.id': 'client-1',
    'enable.auto.commit': True,
    'session.timeout.ms': 6000,
    'default.topic.config': {
        'auto.offset.reset': 'smallest'
    }
}

c = Consumer(settings)

c.subscribe(['downloadvideo'])

try:
    while True:
        msg = c.poll(0.1)
        if msg is None:
            continue
        elif not msg.error():
            url = str(msg.value())
            url_formated = url.replace('b\'http',
                                       'http').replace('"',
                                                       '').replace("'",
                                                                   '').strip()
            print('Received message: {0}'.format(url))
            downloader.run(url_formated)
class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            'bootstrap.servers': 'PLAINTEXT://localhost:9092',
            "default.topic.config": {
                "auto.offset.reset": "earliest"
            },
            "group.id": "0",
        }

        # TODO: Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties[
                "schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)

        #
        #
        # TODO: Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        #
        #
        self.consumer.subscribe([topic_name_pattern], on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        # TODO: If the topic is configured to use `offset_earliest` set the partition offset to
        # the beginning or earliest
        for partition in partitions:
            if self.offset_earliest:
                partition.offset = confluent_kafka.OFFSET_BEGINNING

        logger.info("partitions assigned for %s", self.topic_name_pattern)
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        #
        #
        # TODO: Poll Kafka for messages. Make sure to handle any errors or exceptions.
        # Additionally, make sure you return 1 when a message is processed, and 0 when no message
        # is retrieved.
        #
        #
        try:
            msg = self.consumer.poll(self.consume_timeout)
            if msg:
                err = msg.error()
                if not err:
                    self.message_handler(msg)
                    rc = 1
                else:
                    logger.error(err)
                    rc = 0
            else:
                logger.warn("no messages to consume")
                rc = 0
        except Exception as e:
            logger.error(e)
            rc = 0

        return rc

    def close(self):
        """Cleans up any open kafka consumers"""

        if self.consumer:
            self.consumer.close()
class ConfluentKafkaReader(object):
    def __init__(self, host, port, group, topic, buffer_size, reconnect_wait_time=2):
        """
        Initialize Kafka reader
        """
	logging.info("Initializing Confluent Kafka Consumer")
        self.host = host
        self.port = str(port)
        self.group = group
        self.topic = [topic]
	self.buffer_size = buffer_size
        self.reconnect_wait_time = reconnect_wait_time
        self.reconnect_retries = 0
        self.max_reconnect_retries = 10 # TODO: implement config parameter
	self.buffer = []

        # Initialized on read
        self.consumer = None

    def on_assign(self, consumer, partitions):
#	for p in partitions:
#            p.offset=-2
#        consumer.assign(partitions)
        logging.debug('on_assignment callback...')
        logging.info('Assignment:', partitions)

    def _connect(self):
        connection = {'bootstrap.servers': self.host+":"+self.port, 'group.id': self.group, 'session.timeout.ms': 6000,
			'default.topic.config': {'auto.offset.reset': 'largest'}}
        logging.info("Connecting to Kafka at %s...", connection)
        self.consumer = Consumer(**connection)
        self.consumer.subscribe(self.topic, on_assign=self.on_assign)

    def read(self):
        """
        Read from Kafka. Reconnect on error.
        """
	try:
           self._connect()
	   msgcn = 0
	   while True:
	      msg = self.consumer.poll(timeout=1.0)
	      if msg is None:
 		continue
	      if msg.error():
                # Error or event
		if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                   logging.debug('Catching KafkaError._PARTITION_EOF')
		   logging.error('%s [%d] reached end at offset %d\n', msg.topic(), msg.partition(), msg.offset())
                   logging.error('%s [%d] at offset %d with key %s:\n',
                                 msg.topic(), msg.partition(), msg.offset(),
                                 str(msg.key()))
                   break
		elif msg.error():
                    # Error
                    # TODO : extend exception handling scope as we will end here
                    # for a lot of reasons !
                   logging.debug('Catching other errors...')
		   logging.error("Kafka error: %s.", msg.error())
		   logging.error("Trying to reconnect to %s:%s", self.host, self.port)
                   self.reconnect_retries += 1
		   time.sleep(self.reconnect_wait_time)
                   if self.reconnect_retries >= self.max_reconnect_retries:
                       logging.error("Max reconnection attempt limit reached (%d). Aborting",
                                     self.max_reconnect_retries)
                       break
                   else:
                       self.consumer.close()
                       self._connect()
		       pass
                    #raise KafkaException(msg.error())
	      else:
            # Proper message
		logging.error('%s [%d] at offset %d with key %s:\n', msg.topic(), msg.partition(), msg.offset(), str(msg.key()))
		(self.buffer).append(msg.value().rstrip('\n')) # otherwise the
                #writter will add extra \n 
                msgcn += 1
                #self.consumer.commit(async=False)
		if msgcn >= self.buffer_size: 
                    logging.debug("Read buffer [%d] reached.",self.buffer_size)
                    break
	except KeyboardInterrupt:
          logging.info('Aborted by user\n')
    # Close down consumer to commit final offsets.
	self.consumer.close()
	return(self.buffer)
Beispiel #54
0
# or to pick up all messages that the consumer has missed ('earliest').
# Using 'latest' means the consumer must be started before the producer.
read_topic_from = 'latest'

# How often to indicate data rate in seconds
throughput_debug_interval_in_sec = 1

###
### Consumer code
###

kbs_in_mb = 1000

c = Consumer({
    'bootstrap.servers': kafka_servers,
    'group.id': 'mygroup',
    'auto.offset.reset': read_topic_from
})

c.subscribe([topic_name])

kbs_so_far = 0

window_start_time = int(time.time())

while True:

    # Waits 1 second to receive a message, if it doesn't find one goes round the loop again
    msg = c.poll(1.0)

    if msg is None:
            sys.stderr.write("-T option value needs to be larger than zero: %s\n" % opt[1])
            sys.exit(1)

        conf['stats_cb'] = stats_cb
        conf['statistics.interval.ms'] = int(opt[1])

    # Create logger for consumer (logs will be emitted when poll() is called)
    logger = logging.getLogger('consumer')
    logger.setLevel(logging.DEBUG)
    handler = logging.StreamHandler()
    handler.setFormatter(logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s'))
    logger.addHandler(handler)

    # Create Consumer instance
    # Hint: try debug='fetch' to generate some log messages
    c = Consumer(conf, logger=logger)

    def print_assignment(consumer, partitions):
        print('Assignment:', partitions)

    # Subscribe to topics
    c.subscribe(topics, on_assign=print_assignment)

    # Read messages from Kafka, print to stdout
    try:
        while True:
            msg = c.poll(timeout=1.0)
            if msg is None:
                continue
            if msg.error():
                raise KafkaException(msg.error())
Beispiel #56
0
class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        #
        # Configure the broker properties below.
        # Use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "bootstrap.servers": "PLAINTEXT://localhost:9092",
            "group.id": "0",
        }

        # Create the proper Consumer, AvroConsumer or Consumer.
        if is_avro is True:
            self.broker_properties[
                "schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)

        # Configure the AvroConsumer and subscribe to the topics. on_assign is Callback function

        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        # Set the topic partition offset to the beginning or earliest for each partition

        for partition in partitions:
            partition.offset = OFFSET_BEGINNING

        logger.info("partitions assigned for %s", self.topic_name_pattern)
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        #
        # Poll Kafka for messages with handle of any errors or exceptions.
        # Make sure to return 1 when a message is processed, and 0 when no message
        # is retrieved.

        while True:
            message = self.consumer.poll(1.0)

            if message is None:
                logger.info("_consume is incomplete - skipping")
                return 0
            elif message.error() is not None:
                print(f"error from consumer {message.error()}")
            else:
                return 1

    def close(self):
        """Cleans up any open kafka consumers"""

        # Cleanup the kafka consumer
        self.consumer.close()
Beispiel #57
0
def run_commit_log_consumer(bootstrap_servers, consumer_group, commit_log_topic,
                            partition_state_manager, synchronize_commit_group, start_event, stop_request_event):
    start_event.set()

    logging.debug('Starting commit log consumer...')

    positions = {}

    # NOTE: The commit log consumer group should not be persisted into the
    # ``__consumer_offsets`` topic since no offsets are committed by this
    # consumer. The group membership metadata messages will be published
    # initially but as long as this group remains a single consumer it will
    # be deleted after the consumer is closed.
    # It is very important to note that the ``group.id`` **MUST** be unique to
    # this consumer process!!! This ensures that it is able to consume from all
    # partitions of the commit log topic and get a comprehensive view of the
    # state of the consumer groups it is tracking.
    consumer = Consumer({
        'bootstrap.servers': bootstrap_servers,
        'group.id': consumer_group,
        'enable.auto.commit': 'false',
        'enable.auto.offset.store': 'true',
        'enable.partition.eof': 'false',
        'default.topic.config': {
            'auto.offset.reset': 'error',
        },
    })

    def rewind_partitions_on_assignment(consumer, assignment):
        # The commit log consumer must start consuming from the beginning of
        # the commit log topic to ensure that it has a comprehensive view of
        # all active partitions.
        consumer.assign([
            TopicPartition(
                i.topic,
                i.partition,
                positions.get((i.topic, i.partition), OFFSET_BEGINNING),
            ) for i in assignment
        ])

    consumer.subscribe(
        [commit_log_topic],
        on_assign=rewind_partitions_on_assignment,
    )

    while not stop_request_event.is_set():
        message = consumer.poll(1)
        if message is None:
            continue

        error = message.error()
        if error is not None:
            raise Exception(error)

        positions[(message.topic(), message.partition())] = message.offset() + 1

        group, topic, partition, offset = get_commit_data(message)
        if group != synchronize_commit_group:
            logger.debug('Received consumer offsets update from %r, ignoring...', group)
            continue

        if offset in LOGICAL_OFFSETS:
            logger.debug(
                'Skipping invalid logical offset (%r) from %s/%s...',
                offset,
                topic,
                partition)
            continue
        elif offset < 0:
            logger.warning(
                'Received unexpected negative offset (%r) from %s/%s!',
                offset,
                topic,
                partition)

        partition_state_manager.set_remote_offset(topic, partition, offset)
Beispiel #58
0
class SynchronizedConsumer(object):
    """
    This class implements the framework for a consumer that is intended to only
    consume messages that have already been consumed and committed by members
    of another consumer group.

    This works similarly to the Kafka built-in ``__consumer_offsets`` topic.
    The consumer group that is being "followed" (the one that must make
    progress for our consumer here to make progress, identified by the
    ``synchronize_commit_group`` constructor parameter/instance attribute) must
    report its offsets to a topic (identified by the ``commit_log_topic``
    constructor parameter/instance attribute). This consumer subscribes to both
    commit log topic, as well as the topic(s) that we are actually interested
    in consuming messages from. The messages received from the commit log topic
    control whether or not consumption from partitions belonging to the main
    topic is paused, resumed, or allowed to continue in its current state
    without changes.

    The furthest point in any partition that this consumer should ever consume
    to is the maximum offset that has been recorded to the commit log topic for
    that partition. If the offsets recorded to that topic move
    non-monotonically (due to an intentional offset rollback, for instance)
    this consumer *may* consume up to the highest watermark point. (The
    implementation here tries to pause consuming from the partition as soon as
    possible, but this makes no explicit guarantees about that behavior.)
    """
    initial_offset_reset_strategies = {
        'earliest': get_earliest_offset,
        'latest': get_latest_offset,
    }

    def __init__(self, bootstrap_servers, consumer_group, commit_log_topic,
                 synchronize_commit_group, initial_offset_reset='latest', on_commit=None):
        self.bootstrap_servers = bootstrap_servers
        self.consumer_group = consumer_group
        self.commit_log_topic = commit_log_topic
        self.synchronize_commit_group = synchronize_commit_group
        self.initial_offset_reset = self.initial_offset_reset_strategies[initial_offset_reset]

        self.__partition_state_manager = SynchronizedPartitionStateManager(
            self.__on_partition_state_change)
        self.__commit_log_consumer, self.__commit_log_consumer_stop_request = self.__start_commit_log_consumer()

        self.__positions = {}

        def commit_callback(error, partitions):
            if on_commit is not None:
                return on_commit(error, partitions)

        consumer_configuration = {
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': self.consumer_group,
            'enable.auto.commit': 'false',
            'enable.auto.offset.store': 'true',
            'enable.partition.eof': 'false',
            'default.topic.config': {
                'auto.offset.reset': 'error',
            },
            'on_commit': commit_callback,
        }

        self.__consumer = Consumer(consumer_configuration)

    def __start_commit_log_consumer(self, timeout=None):
        """
        Starts running the commit log consumer.
        """
        stop_request_event = threading.Event()
        start_event = threading.Event()
        result = execute(
            functools.partial(
                run_commit_log_consumer,
                bootstrap_servers=self.bootstrap_servers,
                consumer_group='{}:sync:{}'.format(self.consumer_group, uuid.uuid1().hex),
                commit_log_topic=self.commit_log_topic,
                synchronize_commit_group=self.synchronize_commit_group,
                partition_state_manager=self.__partition_state_manager,
                start_event=start_event,
                stop_request_event=stop_request_event,
            ),
        )
        start_event.wait(timeout)
        return result, stop_request_event

    def __check_commit_log_consumer_running(self):
        if not self.__commit_log_consumer.running():
            try:
                result = self.__commit_log_consumer.result(timeout=0)  # noqa
            except TimeoutError:
                pass  # not helpful

            raise Exception('Commit log consumer unexpectedly exit!')

    def __on_partition_state_change(
            self, topic, partition, previous_state_and_offsets, current_state_and_offsets):
        """
        Callback that is invoked when a partition state changes.
        """
        logger.debug('State change for %r: %r to %r', (topic, partition),
                     previous_state_and_offsets, current_state_and_offsets)

        current_state, current_offsets = current_state_and_offsets
        if current_offsets.local is None:
            # It only makes sense to manipulate the consumer if we've got an
            # assignment. (This block should only be entered at startup if the
            # remote offsets are retrieved from the commit log before the local
            # consumer has received its assignment.)
            return

        # TODO: This will be called from the commit log consumer thread, so need
        # to verify that calling the ``consumer.{pause,resume}`` methods is
        # thread safe!
        if current_state in (SynchronizedPartitionState.UNKNOWN, SynchronizedPartitionState.SYNCHRONIZED,
                             SynchronizedPartitionState.REMOTE_BEHIND):
            self.__consumer.pause([TopicPartition(topic, partition, current_offsets.local)])
        elif current_state is SynchronizedPartitionState.LOCAL_BEHIND:
            self.__consumer.resume([TopicPartition(topic, partition, current_offsets.local)])
        else:
            raise NotImplementedError('Unexpected partition state: %s' % (current_state,))

    def subscribe(self, topics, on_assign=None, on_revoke=None):
        """
        Subscribe to a topic.
        """
        self.__check_commit_log_consumer_running()

        def assignment_callback(consumer, assignment):
            # Since ``auto.offset.reset`` is set to ``error`` to force human
            # interaction on an offset reset, we have to explicitly specify the
            # starting offset if no offset has been committed for this topic during
            # the ``__consumer_offsets`` topic retention period.
            assignment = {
                (i.topic, i.partition): self.__positions.get((i.topic, i.partition)) for i in assignment
            }

            for i in self.__consumer.committed([TopicPartition(topic, partition) for (
                    topic, partition), offset in assignment.items() if offset is None]):
                k = (i.topic, i.partition)
                if i.offset > -1:
                    assignment[k] = i.offset
                else:
                    assignment[k] = self.initial_offset_reset(consumer, i.topic, i.partition)

            self.__consumer.assign([TopicPartition(topic, partition, offset)
                                    for (topic, partition), offset in assignment.items()])

            for (topic, partition), offset in assignment.items():
                # Setting the local offsets will either cause the partition to be
                # paused (if the remote offset is unknown or the local offset is
                # not trailing the remote offset) or resumed.
                self.__partition_state_manager.set_local_offset(topic, partition, offset)
                self.__positions[(topic, partition)] = offset

            if on_assign is not None:
                on_assign(self, [TopicPartition(topic, partition)
                                 for topic, partition in assignment.keys()])

        def revocation_callback(consumer, assignment):
            for item in assignment:
                # TODO: This should probably also be removed from the state manager.
                self.__positions.pop((item.topic, item.partition))

            if on_revoke is not None:
                on_revoke(self, assignment)

        self.__consumer.subscribe(
            topics,
            on_assign=assignment_callback,
            on_revoke=revocation_callback)

    def poll(self, timeout):
        self.__check_commit_log_consumer_running()

        message = self.__consumer.poll(timeout)
        if message is None:
            return

        if message.error() is not None:
            return message

        self.__partition_state_manager.validate_local_message(
            message.topic(), message.partition(), message.offset())
        self.__partition_state_manager.set_local_offset(
            message.topic(), message.partition(), message.offset() + 1)
        self.__positions[(message.topic(), message.partition())] = message.offset() + 1

        return message

    def commit(self, *args, **kwargs):
        self.__check_commit_log_consumer_running()

        return self.__consumer.commit(*args, **kwargs)

    def close(self):
        self.__check_commit_log_consumer_running()

        self.__commit_log_consumer_stop_request.set()
        try:
            self.__consumer.close()
        finally:
            self.__commit_log_consumer.result()
Beispiel #59
-1
def httpry_logs():
    consumer = Consumer({'bootstrap.servers': kafka_hosts, 'group.id': 'Httpry_logs_%s' %dt,'default.topic.config': {'auto.offset.reset': 'latest','auto.commit.enable':'true'}})
    consumer.subscribe(['httpry_logs'])
    try:
        while True:
            msg = consumer.poll()
            if msg:
                if not msg.error():
                    Msg = msg.value().decode('utf-8').strip()
                    try:
                        tm = time.strftime('%Y%m%d%H%M', time.localtime())
                        httpry_Key = 'httpry_domain.%s' % tm
                        if Msg:
                            msg = Msg.split()
                            if len(msg) == 11:
                                if msg[6] != '-':
                                    RC.zincrby(httpry_Key,msg[6], 1)
                                    RC.expire(httpry_Key,600)
                    except Exception as e:
                        logging.error(e)
                        continue
                elif msg.error().code() != KafkaError._PARTITION_EOF:
                    logging.error(msg.error())
                    continue
    except Exception as e:
        logging.error(e)
    finally:
        consumer.close()
Beispiel #60
-1
def analytics_internet2_logs():
    consumer = Consumer({'bootstrap.servers': kafka_hosts, 'group.id': 'Internet2_logs_%s' %dt,'default.topic.config': {'auto.offset.reset': 'latest','auto.commit.enable':'true'}})
    consumer.subscribe(['haproxy_logs'])
    try:
        while True:
            msg = consumer.poll()
            if not msg.error():
                Msg = msg.value().decode('utf-8').strip()
                try:
                    tt = time.strftime('%Y%m%d', time.localtime())
                    tm = time.strftime('%Y%m%d%H%M', time.localtime())
                    Tm = time.strftime('%H:%M', time.localtime())
                    Tra_ser_minute_Key = 'traffic.ser.%s' % tm
                    Tra_cli_minute_Key = 'traffic.cli.%s' % tm
                    if Msg:
                        Msg = Msg.split()
                        if len(Msg) >= 17:
                            traffic_cli = Msg[10]
                            traffic_ser = Msg[11]
                            Topic = str(Msg[14]).split('|')[0].replace('{', '').strip()
                            IP = str(Msg[5])
                            Rtime = Msg[8].split('/')[-1]
                            if Rtime.isdigit():
                                Rtime = int(Rtime)
                            else:
                                Rtime = 0
                            uv_key = 'baihe_uv_%s' % tt
                            Rt_Key = 'Rtime_%s_%s' % (tt, Topic)
                            PATH = str(Msg[16]).split('?')[0]
                            URL = 'http://%s%s' % (Topic,PATH)
                            Tra_ser_url_minute_Key = 'traffic.ser.url_%s' % Tm
                            Tra_cli_url_minute_Key = 'traffic.cli.url_%s' % Tm
                            for KEY in (uv_key,Rt_Key,Tra_ser_url_minute_Key,Tra_cli_url_minute_Key):
                                RC.expire(KEY,3600)
                            # 流量
                            if traffic_ser.isdigit() and traffic_cli.isdigit():
                                RC.zincrby(Tra_cli_url_minute_Key, URL, int(traffic_cli))
                                RC.zincrby(Tra_ser_url_minute_Key,URL, int(traffic_ser))
                                # 实时流量
                                RC.zincrby(Tra_cli_minute_Key, Topic, int(traffic_cli))
                                RC.expire(Tra_cli_minute_Key, 300)
                                RC.zincrby(Tra_ser_minute_Key, Topic, int(traffic_ser))
                                RC.expire(Tra_ser_minute_Key, 300)
                            #
                            if Rtime:
                                RC.lpush(Rt_Key, Rtime)
                                RC.sadd(uv_key, IP)
                except Exception as e:
                    logging.error(e)
                    continue
            elif msg.error().code() != KafkaError._PARTITION_EOF:
                logging.error(msg.error())
                continue
    except Exception as e:
        logging.error(e)
    finally:
        consumer.close()