def populate(self, auth_info, stmt_data, payload_sha2s):
        if self.__class__.__name__ == 'StatementManager':
            stmt_data['voided'] = False

        self.build_verb(stmt_data)
        self.build_statement_object(auth_info, stmt_data)
        stmt_data['actor'] = Agent.objects.retrieve_or_create(
            **stmt_data['actor'])[0]
        self.build_context(stmt_data)
        self.build_result(stmt_data)
        # Substatement could not have timestamp
        if 'timestamp' in stmt_data:
            stmt_data['timestamp'] = convert_to_datetime_object(
                stmt_data['timestamp'])
        attachment_data = stmt_data.pop('attachments', None)

        if self.__class__.__name__ == 'StatementManager':
            # Save statement/substatement
            self.model_object = self.build_statement(auth_info, stmt_data)
        else:
            self.model_object = self.build_substatement(auth_info, stmt_data)
        try:
            kafka = SimpleClient(KAFKA_HOST)
            producer = SimpleProducer(kafka)
            producer.send_messages("StatementManager", str(self.model_object))
            kafka.close()
        except:
            print('error with StatementManager Kafka')
        if attachment_data:
            self.build_attachments(auth_info, attachment_data, payload_sha2s)
    def assert_message_count(self, topic, check_count, timeout=10,
                             partitions=None, at_least=False):
        hosts = ','.join(['%s:%d' % (broker.host, broker.port)
                          for broker in self.brokers])

        client = SimpleClient(hosts, timeout=2)
        consumer = SimpleConsumer(client, None, topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = -1
        while pending < check_count and (time.time() - started_at < timeout):
            try:
                pending = consumer.pending(partitions)
            except FailedPayloadsError:
                pass
            time.sleep(0.5)

        consumer.stop()
        client.close()

        if pending < check_count:
            self.fail('Too few pending messages: found %d, expected %d' %
                      (pending, check_count))
        elif pending > check_count and not at_least:
            self.fail('Too many pending messages: found %d, expected %d' %
                      (pending, check_count))
        return True
Example #3
0
    def _get_highwater_offsets(self, kafka_hosts_ports):
        """
        Fetch highwater offsets for each topic/partition from Kafka cluster.

        Do this for all partitions in the cluster because even if it has no
        consumers, we may want to measure whether producers are successfully
        producing. No need to limit this for performance because fetching broker
        offsets from Kafka is a relatively inexpensive operation.
        """
        kafka_conn = SimpleClient(kafka_hosts_ports,
                                  timeout=self.kafka_timeout)
        try:
            broker_topics_partitions = kafka_conn.topics_to_brokers.keys()
            # batch a bunch of requests into a single network call
            offsets_request = [
                OffsetRequestPayload(topic, partition, -1, 1)
                for topic, partition in broker_topics_partitions
            ]
            offsets_response = kafka_conn.send_offset_request(offsets_request)
            highwater_offsets = {(x.topic, x.partition): x.offsets[0]
                                 for x in offsets_response}
        finally:
            try:
                kafka_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Kafka connection')
        return highwater_offsets
    def assert_message_count(self, topic, check_count, timeout=10,
                             partitions=None, at_least=False):
        hosts = ','.join(['%s:%d' % (broker.host, broker.port)
                          for broker in self.brokers])

        client = SimpleClient(hosts, timeout=2)
        consumer = SimpleConsumer(client, None, topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = -1
        while pending < check_count and (time.time() - started_at < timeout):
            try:
                pending = consumer.pending(partitions)
            except FailedPayloadsError:
                pass
            time.sleep(0.5)

        consumer.stop()
        client.close()

        if pending < check_count:
            self.fail('Too few pending messages: found %d, expected %d' %
                      (pending, check_count))
        elif pending > check_count and not at_least:
            self.fail('Too many pending messages: found %d, expected %d' %
                      (pending, check_count))
        return True
Example #5
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:],
                               random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' %
                                       (self.server.host, self.server.port))
            self.client_async = KafkaClient(
                bootstrap_servers='%s:%d' %
                (self.server.host, self.server.port))

        self.client.ensure_topic_exists(self.topic)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request(
                [OffsetRequestPayload(topic, partition, -1, 1)])
        except:
            # XXX: We've seen some UnknownErrors here and can't debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [self.msg(x) for x in iterable]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
Example #6
0
def send_to_kafka(message):
    producer = get_producer()
    try:
        producer.send(settings.KAFKA_TOPIC, message)
    except:
        client = SimpleClient(hosts=settings.KAFKA_SERVERS)
        client.ensure_topic_exists(settings.KAFKA_TOPIC)
        client.close()
        producer.send(settings.KAFKA_TOPIC, message)
    producer.close(10)
Example #7
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port))

        self.client.ensure_topic_exists(self.topic)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request([OffsetRequestPayload(topic, partition, -1, 1)])
        except:
            # XXX: We've seen some UnknownErrors here and cant debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [ self.msg(x) for x in iterable ]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
    def getTopics(self, once_sleep=60):
        '''
        获取需要消费的topic,可通过数据库表控制
        :param once_sleep: topics更新频率
        :return:
        '''
        while True:
            if self.debug:
                debug_topic = self.configures.get("debugconf", "debug_topic")
                self.topics = [(item, "debug")
                               for item in debug_topic.split(",")]
            else:
                kafka_topics = set()
                saas_appkey = set()
                appkey_logpath = {}
                try:
                    from kafka import SimpleClient
                    hostname = self.configures.get("kafka", "hostname")
                    client = SimpleClient(hosts=hostname)
                    for topic in client.topics:
                        kafka_topics.add(topic)
                    client.close()
                    log.info("get kafka topics: %s" %
                             json.dumps(list(kafka_topics)))
                except:
                    logging.error(sys.exc_info())
                    continue

                try:
                    client = MysqlClient("saas_server")
                    topics = client.getTopics(group_id=self.group_id)
                    for topic, logpath in topics:
                        saas_appkey.add(topic)
                        appkey_logpath.setdefault(topic, set()).add(logpath)
                    client.closeMysql()
                    log.info("get mysql appkeys: %s" %
                             json.dumps(list(saas_appkey)))
                except:
                    logging.error(sys.exc_info())
                    continue
                self.topics = [(topic, logpath)
                               for topic in list(kafka_topics & saas_appkey)
                               for logpath in appkey_logpath[topic]]
            log.info("current topics: %s" % json.dumps(self.topics))
            time.sleep(once_sleep)
class KafkaClient(object):
	def __init__(self, kafka_host, topic, group_id):
		self.client = SimpleClient(kafka_host)
		self.topic = topic
		try:
			self.partitions = self.client.topic_partitions[topic]
		except KeyError as ex:
			print('KeyError: {}'.format(ex))
			self.partitions = None
		self.group_id = group_id

	def close(self):
		self.client.close()

	def get_latest_offsets(self):
		request = [OffsetRequestPayload(self.topic, p, -1, 1) for p in self.partitions.keys()]
		response = self.client.send_offset_request(request)	
		offsets = {r.partition: r.offsets[0] for r in response} # build dictionary
		return offsets
Example #10
0
class KafkaClient(object):
    def __init__(self, kafka_host, topic, group_id):
        self.client = SimpleClient(kafka_host)
        self.topic = topic
        self.partitions = self.client.topic_partitions[topic]
        self.group_id = group_id

    def close(self):
        self.client.close()

    def get_tail_offsets(self):
        request = [
            OffsetRequestPayload(self.topic, p, -1, 1)
            for p in self.partitions.keys()
        ]
        response = self.client.send_offset_request(request)
        offsets = {r.partition: r.offsets[0]
                   for r in response}  # build dictionary
        return offsets
Example #11
0
def topic_offsets(kafka_brokers, topic):
    client = SimpleClient(insure_is_array(kafka_brokers))
    topic_partitions = client.topic_partitions
    if topic not in topic_partitions:
        raise KafkaException("topic {} doesn't exists".format(topic))
    partitions = topic_partitions[topic]
    offset_requests = [
        OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
    ]
    offsets_responses = client.send_offset_request(offset_requests)
    client.close()
    partitions_and_offsets = {}
    for offset in offsets_responses:
        if offset.topic == topic:
            topic_offset = 0
            topic_partition = TopicPartition(topic=offset.topic,
                                             partition=offset.partition)
            if offset.offsets[0]:
                topic_offset = offset.offsets[0]
            partitions_and_offsets[topic_partition] = topic_offset

    return partitions_and_offsets
Example #12
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port))

        timeout = time.time() + 30
        while time.time() < timeout:
            try:
                self.client.load_metadata_for_topics(self.topic, ignore_leadernotavailable=False)
                if self.client.has_metadata_for_topic(topic):
                    break
            except (LeaderNotAvailableError, InvalidTopicError):
                time.sleep(1)
        else:
            raise KafkaTimeoutError('Timeout loading topic metadata!')


        # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors
        # TODO: It might be a good idea to move this to self.client.ensure_topic_exists
        for partition in self.client.get_partition_ids_for_topic(self.topic):
            while True:
                try:
                    req = OffsetRequestPayload(self.topic, partition, -1, 100)
                    self.client.send_offset_request([req])
                    break
                except (NotLeaderForPartitionError, UnknownTopicOrPartitionError, FailedPayloadsError) as e:
                    if time.time() > timeout:
                        raise KafkaTimeoutError('Timeout loading topic metadata!')
                    time.sleep(.1)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request([OffsetRequestPayload(topic,
                                                                             partition, -1, 1)])
        except Exception:
            # XXX: We've seen some UnknownErrors here and can't debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [self.msg(x) for x in iterable]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
Example #13
0
# -*- coding: utf-8 -*-
from kafka import SimpleClient, SimpleProducer, KafkaConsumer

kafka = SimpleClient(
    "192.168.6.51  192.168.6.52   192.168.6.53  192.168.6.54  192.168.6.55")
producer = SimpleProducer(kafka)

kafka.close()
Example #14
0
def ensure_topic():
    client = SimpleClient(hosts=KAFKA_SERVERS)
    client.ensure_topic_exists(KAFKA_TOPIC)
    client.close()
 def populate(self, data):
     activity_id = data['id']
     can_define = False
     # Try to get activity
     try:
         self.activity = Activity.objects.get(activity_id=activity_id)
         act_created = False
     # Activity DNE
     except Activity.DoesNotExist:
         # If activity DNE and can define - create activity with auth
         if self.define_permission:
             can_define = True
             try:
                 # Using get or create inside try for racing issue
                 self.activity, act_created = Activity.objects.get_or_create(
                     activity_id=activity_id, authority=self.auth)
             except IntegrityError:
                 self.activity = Activity.objects.get(
                     activity_id=activity_id)
                 act_created = False
         # If activity DNE and cannot define - create activity without auth
         else:
             try:
                 # Using get or create inside try for racing issue
                 self.activity, act_created = Activity.objects.get_or_create(
                     activity_id=activity_id)
             except IntegrityError:
                 self.activity = Activity.objects.get(
                     activity_id=activity_id)
                 act_created = False
         # If you retrieved an activity that has no auth but user has define
         # permissions, user becomes authority over activity
         if not act_created and can_define and not self.activity.authority:
             self.activity.authority = self.auth
     # Activity already exists
     else:
         # If activity already exists and have define
         if self.define_permission:
             # Act exists but it was created by someone who didn't have define permissions so it's up for grabs
             # for first user with define permission or...
             # Act exists - if it has same auth set it, else do nothing
             if (not self.activity.authority) or \
                (self.activity.authority == self.auth) or \
                (self.activity.authority.objectType == 'Group' and self.auth in self.activity.authority.member.all()) or \
                (self.auth.objectType == 'Group' and self.activity.authority in self.auth.member.all()):
                 can_define = True
             else:
                 can_define = False
         # activity already exists but do not have define
         else:
             can_define = False
     # Set id and objectType regardless
     self.activity.canonical_data['id'] = activity_id
     self.activity.canonical_data['objectType'] = 'Activity'
     incoming_act_def = data.get('definition', None)
     # If activity existed, and the user has define privileges - update
     # activity
     if can_define and not act_created:
         self.update_language_maps(incoming_act_def)
     # If activity was created and the user has define privileges
     elif can_define and act_created:
         # If there is an incoming definition
         if incoming_act_def:
             self.activity.canonical_data['definition'] = incoming_act_def
     self.activity.save()
     try:
         kafka = SimpleClient(KAFKA_HOST)
         producer = SimpleProducer(kafka)
         producer.send_messages("ActivityManager",
                                self.activity.__unicode__())
         kafka.close()
     except:
         print(' error with ActivityManager kafka')
Example #16
0
    def spoorer(self):  #连接kafka,获取topics
        try:
            kafka_client = SimpleClient(self.kafka_hosts, timeout=self.timeout)
            # print kafka_client.topics
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)
        else:
            kafka_topics = kafka_client.topics
        finally:
            kafka_client.close()

        #连接zk,获取当前消费进度current offset
        try:
            zookeeper_client = KazooClient(hosts=self.zookeeper_hosts, read_only=True, timeout=self.timeout)
            zookeeper_client.start()
        except Exception as e:
            print "Error, cannot connect zookeeper server."
            sys.exit(1)

        try:
            groups = map(str,zookeeper_client.get_children(self.zookeeper_url + 'consumers'))
        except NoNodeError as e:
            print "Error, invalid zookeeper url."
            zookeeper_client.stop()
            sys.exit(2)
        else:
            for group in groups:
                if 'offsets' not in zookeeper_client.get_children(self.zookeeper_url + 'consumers/%s' % group):continue
                topic_path = 'consumers/%s/offsets' % (group)
                topics = map(str,zookeeper_client.get_children(self.zookeeper_url + topic_path))
                if len(topics) == 0: continue
                for topic in topics:
                    # print topic
                    # print self.white_topic_group.keys()
                    if topic not in self.white_topic_group.keys():
                        continue
                    # elif group not in self.white_topic_group[topic].replace(' ','').split(','):
                    #     continue
                    partition_path = 'consumers/%s/offsets/%s' % (group,topic)
                    partitions = map(int,zookeeper_client.get_children(self.zookeeper_url + partition_path))

                    for partition in partitions:
                        base_path = 'consumers/%s/%s/%s/%s' % (group, '%s', topic, partition)
                        owner_path, offset_path = base_path % 'owners', base_path % 'offsets'
                        offset = zookeeper_client.get(self.zookeeper_url + offset_path)[0]

                        try:
                            owner = zookeeper_client.get(self.zookeeper_url + owner_path)[0]
                        except NoNodeError as e:
                            owner = 'null'
                        #消费进度放在字典metric中
                        metric = {'datetime':time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), 'topic':topic, 'group':group, 'partition':int(partition), 'logsize':None, 'offset':int(offset), 'lag':None, 'owner':owner}
                        self.result.append(metric)
        finally:
            zookeeper_client.stop()
        #获取每个分片的logsize
        try:
            client = SimpleClient(self.kafka_hosts)
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)
        else:
            for kafka_topic in kafka_topics:
                self.kafka_logsize[kafka_topic] = {}
                partitions = client.topic_partitions[kafka_topic]
                offset_requests = [OffsetRequestPayload(kafka_topic, p, -1, 1) for p in partitions.keys()]
                offsets_responses = client.send_offset_request(offset_requests)
                for r in offsets_responses:
                    self.kafka_logsize[kafka_topic][r.partition] = r.offsets[0]

            #logsize减去current offset等于lag
        f1 = open(self.log_file,'w')
        # f2 = open(self.log_day_file,'a')
        # print self.result
        for metric in self.result:
            logsize = self.kafka_logsize[metric['topic']][metric['partition']]
            metric['logsize'] = int(logsize)
            metric['lag'] = int(logsize) - int(metric['offset'])
            f1.write(json.dumps(metric,sort_keys=True) + '\n')
            f1.flush()
            # f2.write(json.dumps(metric,sort_keys=True) + '\n')
            # f2.flush()
        # finally:
        f1.close()
        client.close()
Example #17
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:],
                               random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' %
                                       (self.server.host, self.server.port))

        timeout = time.time() + 30
        while time.time() < timeout:
            try:
                self.client.load_metadata_for_topics(
                    self.topic, ignore_leadernotavailable=False)
                if self.client.has_metadata_for_topic(topic):
                    break
            except (LeaderNotAvailableError, InvalidTopicError):
                time.sleep(1)
        else:
            raise KafkaTimeoutError('Timeout loading topic metadata!')

        # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors
        # TODO: It might be a good idea to move this to self.client.ensure_topic_exists
        for partition in self.client.get_partition_ids_for_topic(self.topic):
            while True:
                try:
                    req = OffsetRequestPayload(self.topic, partition, -1, 100)
                    self.client.send_offset_request([req])
                    break
                except (NotLeaderForPartitionError,
                        UnknownTopicOrPartitionError,
                        FailedPayloadsError) as e:
                    if time.time() > timeout:
                        raise KafkaTimeoutError(
                            'Timeout loading topic metadata!')
                    time.sleep(.1)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request(
                [OffsetRequestPayload(topic, partition, -1, 1)])
        except Exception:
            # XXX: We've seen some UnknownErrors here and can't debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [self.msg(x) for x in iterable]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
class TestFailover(KafkaIntegrationTestCase):
    create_client = False

    def setUp(self):
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('integration test requires KAFKA_VERSION')

        zk_chroot = random_string(10)
        replicas = 3
        partitions = 3

        # mini zookeeper, 3 kafka brokers
        self.zk = ZookeeperFixture.instance()
        kk_kwargs = {
            'zk_chroot': zk_chroot,
            'replicas': replicas,
            'partitions': partitions
        }
        self.brokers = [
            KafkaFixture.instance(i, self.zk, **kk_kwargs)
            for i in range(replicas)
        ]

        hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
        self.client = SimpleClient(hosts, timeout=2)
        super(TestFailover, self).setUp()

    def tearDown(self):
        super(TestFailover, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        self.client.close()
        for broker in self.brokers:
            broker.close()
        self.zk.close()

    def test_switch_leader(self):
        topic = self.topic
        partition = 0

        # Testing the base Producer class here so that we can easily send
        # messages to a specific partition, kill the leader for that partition
        # and check that after another broker takes leadership the producer
        # is able to resume sending messages

        # require that the server commit messages to all in-sync replicas
        # so that failover doesn't lose any messages on server-side
        # and we can assert that server-side message count equals client-side
        producer = Producer(self.client,
                            async_send=False,
                            req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT)

        # Send 100 random messages to a specific partition
        self._send_random_messages(producer, topic, partition, 100)

        # kill leader for partition
        self._kill_leader(topic, partition)

        # expect failure, but don't wait more than 60 secs to recover
        recovered = False
        started = time.time()
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
                log.debug(
                    "attempting to send 'success' message after leader killed")
                producer.send_messages(topic, partition, b'success')
                log.debug("success!")
                recovered = True
            except (FailedPayloadsError, KafkaConnectionError,
                    RequestTimedOutError, NotLeaderForPartitionError):
                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
        self.assertTrue(recovered)

        # send some more messages to new leader
        self._send_random_messages(producer, topic, partition, 100)

        # count number of messages
        # Should be equal to 100 before + 1 recovery + 100 after
        # at_least=True because exactly once delivery isn't really a thing
        self.assert_message_count(topic,
                                  201,
                                  partitions=(partition, ),
                                  at_least=True)

    def test_switch_leader_async(self):
        topic = self.topic
        partition = 0

        # Test the base class Producer -- send_messages to a specific partition
        producer = Producer(self.client,
                            async_send=True,
                            batch_send_every_n=15,
                            batch_send_every_t=3,
                            req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT,
                            async_log_messages_on_error=False)

        # Send 10 random messages
        self._send_random_messages(producer, topic, partition, 10)
        self._send_random_messages(producer, topic, partition + 1, 10)

        # kill leader for partition
        self._kill_leader(topic, partition)

        log.debug("attempting to send 'success' message after leader killed")

        # in async mode, this should return immediately
        producer.send_messages(topic, partition, b'success')
        producer.send_messages(topic, partition + 1, b'success')

        # send to new leader
        self._send_random_messages(producer, topic, partition, 10)
        self._send_random_messages(producer, topic, partition + 1, 10)

        # Stop the producer and wait for it to shutdown
        producer.stop()
        started = time.time()
        timeout = 60
        while (time.time() - started) < timeout:
            if not producer.thread.is_alive():
                break
            time.sleep(0.1)
        else:
            self.fail('timeout waiting for producer queue to empty')

        # count number of messages
        # Should be equal to 10 before + 1 recovery + 10 after
        # at_least=True because exactly once delivery isn't really a thing
        self.assert_message_count(topic,
                                  21,
                                  partitions=(partition, ),
                                  at_least=True)
        self.assert_message_count(topic,
                                  21,
                                  partitions=(partition + 1, ),
                                  at_least=True)

    def test_switch_leader_keyed_producer(self):
        topic = self.topic

        producer = KeyedProducer(self.client, async_send=False)

        # Send 10 random messages
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)

        # kill leader for partition 0
        self._kill_leader(topic, 0)

        recovered = False
        started = time.time()
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
                key = random_string(3).encode('utf-8')
                msg = random_string(10).encode('utf-8')
                producer.send_messages(topic, key, msg)
                if producer.partitioners[topic].partition(key) == 0:
                    recovered = True
            except (FailedPayloadsError, KafkaConnectionError,
                    RequestTimedOutError, NotLeaderForPartitionError):
                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
        self.assertTrue(recovered)

        # send some more messages just to make sure no more exceptions
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)

    def test_switch_leader_simple_consumer(self):
        producer = Producer(self.client, async_send=False)
        consumer = SimpleConsumer(self.client,
                                  None,
                                  self.topic,
                                  partitions=None,
                                  auto_commit=False,
                                  iter_timeout=10)
        self._send_random_messages(producer, self.topic, 0, 2)
        consumer.get_messages()
        self._kill_leader(self.topic, 0)
        consumer.get_messages()

    def _send_random_messages(self, producer, topic, partition, n):
        for j in range(n):
            msg = 'msg {0}: {1}'.format(j, random_string(10))
            log.debug('_send_random_message %s to %s:%d', msg, topic,
                      partition)
            while True:
                try:
                    producer.send_messages(topic, partition,
                                           msg.encode('utf-8'))
                except Exception:
                    log.exception(
                        'failure in _send_random_messages - retrying')
                    continue
                else:
                    break

    def _kill_leader(self, topic, partition):
        leader = self.client.topics_to_brokers[TopicPartition(
            topic, partition)]
        broker = self.brokers[leader.nodeId]
        broker.close()
        return broker

    def assert_message_count(self,
                             topic,
                             check_count,
                             timeout=10,
                             partitions=None,
                             at_least=False):
        hosts = ','.join(
            ['%s:%d' % (broker.host, broker.port) for broker in self.brokers])

        client = SimpleClient(hosts, timeout=2)
        consumer = SimpleConsumer(client,
                                  None,
                                  topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = -1
        while pending < check_count and (time.time() - started_at < timeout):
            try:
                pending = consumer.pending(partitions)
            except FailedPayloadsError:
                pass
            time.sleep(0.5)

        consumer.stop()
        client.close()

        if pending < check_count:
            self.fail('Too few pending messages: found %d, expected %d' %
                      (pending, check_count))
        elif pending > check_count and not at_least:
            self.fail('Too many pending messages: found %d, expected %d' %
                      (pending, check_count))
        return True
Example #19
0
from kafka import SimpleProducer, SimpleClient, SimpleConsumer

# To consume messages
client = SimpleClient('localhost:9092')
consumer = SimpleConsumer(client, "my-group", "my-topic")
for message in consumer:
    # message is raw byte string -- decode if necessary!
    # e.g., for unicode: `message.decode('utf-8')`
    print(message)

# Use multiprocessing for parallel consumers
from kafka import MultiProcessConsumer

# This will split the number of partitions among two processes
consumer = MultiProcessConsumer(client, "my-group", "my-topic", num_procs=2)

# This will spawn processes such that each handles 2 partitions max
consumer = MultiProcessConsumer(client,
                                "my-group",
                                "my-topic",
                                partitions_per_proc=2)

for message in consumer:
    print(message)

for message in consumer.get_messages(count=5, block=True, timeout=4):
    print(message)

client.close()
Example #20
0
    def spoorer(self):
        try:
            kafka_client = SimpleClient(self.kafka_hosts, timeout=self.timeout)
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)
        else:
            kafka_topics = kafka_client.topics
        finally:
            kafka_client.close()

        try:
            zookeeper_client = KazooClient(hosts=self.zookeeper_hosts,
                                           read_only=True,
                                           timeout=self.timeout)
            zookeeper_client.start()
        except Exception as e:
            print "Error, cannot connect zookeeper server."
            sys.exit(1)

        try:
            groups = map(
                str,
                zookeeper_client.get_children(self.zookeeper_url +
                                              'consumers'))
        except NoNodeError as e:
            print "Error, invalid zookeeper url."
            zookeeper_client.stop()
            sys.exit(2)
        else:
            for group in groups:
                if 'offsets' not in zookeeper_client.get_children(
                        self.zookeeper_url + 'consumers/%s' % group):
                    continue
                topic_path = 'consumers/%s/offsets' % (group)
                topics = map(
                    str,
                    zookeeper_client.get_children(self.zookeeper_url +
                                                  topic_path))
                if len(topics) == 0: continue

                for topic in topics:
                    if topic not in self.white_topic_group.keys():
                        continue
                    elif group not in self.white_topic_group[topic].replace(
                            ' ', '').split(','):
                        continue
                    partition_path = 'consumers/%s/offsets/%s' % (group, topic)
                    partitions = map(
                        int,
                        zookeeper_client.get_children(self.zookeeper_url +
                                                      partition_path))

                    for partition in partitions:
                        base_path = 'consumers/%s/%s/%s/%s' % (
                            group, '%s', topic, partition)
                        owner_path, offset_path = base_path % 'owners', base_path % 'offsets'
                        offset = zookeeper_client.get(self.zookeeper_url +
                                                      offset_path)[0]
                        try:
                            owner = zookeeper_client.get(self.zookeeper_url +
                                                         owner_path)[0]
                        except NoNodeError as e:
                            owner = 'null'

                        metric = {
                            'datetime':
                            time.strftime("%Y-%m-%d %H:%M:%S",
                                          time.localtime()),
                            'topic':
                            topic,
                            'group':
                            group,
                            'partition':
                            int(partition),
                            'logsize':
                            None,
                            'offset':
                            int(offset),
                            'lag':
                            None,
                            'owner':
                            owner
                        }
                        self.result.append(metric)
        finally:
            zookeeper_client.stop()

        try:
            kafka_consumer = KafkaConsumer(bootstrap_servers=self.kafka_hosts)
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)
        else:
            for kafka_topic in kafka_topics:
                self.kafka_logsize[kafka_topic] = {}
                partitions = kafka_client.get_partition_ids_for_topic(
                    kafka_topic)

                for partition in partitions:
                    offset = kafka_consumer.get_partition_offsets(
                        kafka_topic, partition, -1, 1)[0]
                    self.kafka_logsize[kafka_topic][partition] = offset

            f1 = open(self.log_file, 'w')
            f2 = open(self.log_day_file, 'a')

            for metric in self.result:
                logsize = self.kafka_logsize[metric['topic']][
                    metric['partition']]
                metric['logsize'] = int(logsize)
                metric['lag'] = int(logsize) - int(metric['offset'])

                f1.write(json.dumps(metric, sort_keys=True) + '\n')
                f1.flush()
                f2.write(json.dumps(metric, sort_keys=True) + '\n')
                f2.flush()
        finally:
            kafka_consumer.close()

        return ''
class TestFailover(KafkaIntegrationTestCase):
    create_client = False

    def setUp(self):
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('integration test requires KAFKA_VERSION')

        zk_chroot = random_string(10)
        replicas = 3
        partitions = 3

        # mini zookeeper, 3 kafka brokers
        self.zk = ZookeeperFixture.instance()
        kk_args = [self.zk.host, self.zk.port]
        kk_kwargs = {'zk_chroot': zk_chroot, 'replicas': replicas,
                     'partitions': partitions}
        self.brokers = [KafkaFixture.instance(i, *kk_args, **kk_kwargs)
                        for i in range(replicas)]

        hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
        self.client = SimpleClient(hosts, timeout=2)
        super(TestFailover, self).setUp()

    def tearDown(self):
        super(TestFailover, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        self.client.close()
        for broker in self.brokers:
            broker.close()
        self.zk.close()

    def test_switch_leader(self):
        topic = self.topic
        partition = 0

        # Testing the base Producer class here so that we can easily send
        # messages to a specific partition, kill the leader for that partition
        # and check that after another broker takes leadership the producer
        # is able to resume sending messages

        # require that the server commit messages to all in-sync replicas
        # so that failover doesn't lose any messages on server-side
        # and we can assert that server-side message count equals client-side
        producer = Producer(self.client, async=False,
                            req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT)

        # Send 100 random messages to a specific partition
        self._send_random_messages(producer, topic, partition, 100)

        # kill leader for partition
        self._kill_leader(topic, partition)

        # expect failure, but don't wait more than 60 secs to recover
        recovered = False
        started = time.time()
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
                log.debug("attempting to send 'success' message after leader killed")
                producer.send_messages(topic, partition, b'success')
                log.debug("success!")
                recovered = True
            except (FailedPayloadsError, ConnectionError, RequestTimedOutError,
                    NotLeaderForPartitionError):
                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
        self.assertTrue(recovered)

        # send some more messages to new leader
        self._send_random_messages(producer, topic, partition, 100)

        # count number of messages
        # Should be equal to 100 before + 1 recovery + 100 after
        # at_least=True because exactly once delivery isn't really a thing
        self.assert_message_count(topic, 201, partitions=(partition,),
                                  at_least=True)

    def test_switch_leader_async(self):
        topic = self.topic
        partition = 0

        # Test the base class Producer -- send_messages to a specific partition
        producer = Producer(self.client, async=True,
                            batch_send_every_n=15,
                            batch_send_every_t=3,
                            req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT,
                            async_log_messages_on_error=False)

        # Send 10 random messages
        self._send_random_messages(producer, topic, partition, 10)
        self._send_random_messages(producer, topic, partition + 1, 10)

        # kill leader for partition
        self._kill_leader(topic, partition)

        log.debug("attempting to send 'success' message after leader killed")

        # in async mode, this should return immediately
        producer.send_messages(topic, partition, b'success')
        producer.send_messages(topic, partition + 1, b'success')

        # send to new leader
        self._send_random_messages(producer, topic, partition, 10)
        self._send_random_messages(producer, topic, partition + 1, 10)

        # Stop the producer and wait for it to shutdown
        producer.stop()
        started = time.time()
        timeout = 60
        while (time.time() - started) < timeout:
            if not producer.thread.is_alive():
                break
            time.sleep(0.1)
        else:
            self.fail('timeout waiting for producer queue to empty')

        # count number of messages
        # Should be equal to 10 before + 1 recovery + 10 after
        # at_least=True because exactly once delivery isn't really a thing
        self.assert_message_count(topic, 21, partitions=(partition,),
                                  at_least=True)
        self.assert_message_count(topic, 21, partitions=(partition + 1,),
                                  at_least=True)

    def test_switch_leader_keyed_producer(self):
        topic = self.topic

        producer = KeyedProducer(self.client, async=False)

        # Send 10 random messages
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)

        # kill leader for partition 0
        self._kill_leader(topic, 0)

        recovered = False
        started = time.time()
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
                key = random_string(3).encode('utf-8')
                msg = random_string(10).encode('utf-8')
                producer.send_messages(topic, key, msg)
                if producer.partitioners[topic].partition(key) == 0:
                    recovered = True
            except (FailedPayloadsError, ConnectionError, RequestTimedOutError,
                    NotLeaderForPartitionError):
                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
        self.assertTrue(recovered)

        # send some more messages just to make sure no more exceptions
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)

    def test_switch_leader_simple_consumer(self):
        producer = Producer(self.client, async=False)
        consumer = SimpleConsumer(self.client, None, self.topic, partitions=None, auto_commit=False, iter_timeout=10)
        self._send_random_messages(producer, self.topic, 0, 2)
        consumer.get_messages()
        self._kill_leader(self.topic, 0)
        consumer.get_messages()

    def _send_random_messages(self, producer, topic, partition, n):
        for j in range(n):
            msg = 'msg {0}: {1}'.format(j, random_string(10))
            log.debug('_send_random_message %s to %s:%d', msg, topic, partition)
            while True:
                try:
                    producer.send_messages(topic, partition, msg.encode('utf-8'))
                except:
                    log.exception('failure in _send_random_messages - retrying')
                    continue
                else:
                    break

    def _kill_leader(self, topic, partition):
        leader = self.client.topics_to_brokers[TopicPartition(topic, partition)]
        broker = self.brokers[leader.nodeId]
        broker.close()
        return broker

    def assert_message_count(self, topic, check_count, timeout=10,
                             partitions=None, at_least=False):
        hosts = ','.join(['%s:%d' % (broker.host, broker.port)
                          for broker in self.brokers])

        client = SimpleClient(hosts, timeout=2)
        consumer = SimpleConsumer(client, None, topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = -1
        while pending < check_count and (time.time() - started_at < timeout):
            try:
                pending = consumer.pending(partitions)
            except FailedPayloadsError:
                pass
            time.sleep(0.5)

        consumer.stop()
        client.close()

        if pending < check_count:
            self.fail('Too few pending messages: found %d, expected %d' %
                      (pending, check_count))
        elif pending > check_count and not at_least:
            self.fail('Too many pending messages: found %d, expected %d' %
                      (pending, check_count))
        return True