def test_produce_consume(self): # Send two messages and consume them message1 = KafkaClient.create_message("testing 1") message2 = KafkaClient.create_message("testing 2") req = ProduceRequest("test-produce-consume", 0, [message1, message2]) self.kafka.send_message_set(req) self.assertTrue(self.server.wait_for("Created log for 'test-produce-consume'-0")) self.assertTrue(self.server.wait_for("Flushing log 'test-produce-consume-0'")) req = FetchRequest("test-produce-consume", 0, 0, 1024) (messages, req) = self.kafka.get_message_set(req) self.assertEquals(len(messages), 2) self.assertEquals(messages[0], message1) self.assertEquals(messages[1], message2) # Do the same, but for a different partition message3 = KafkaClient.create_message("testing 3") message4 = KafkaClient.create_message("testing 4") req = ProduceRequest("test-produce-consume", 1, [message3, message4]) self.kafka.send_message_set(req) self.assertTrue(self.server.wait_for("Created log for 'test-produce-consume'-1")) self.assertTrue(self.server.wait_for("Flushing log 'test-produce-consume-1'")) req = FetchRequest("test-produce-consume", 1, 0, 1024) (messages, req) = self.kafka.get_message_set(req) self.assertEquals(len(messages), 2) self.assertEquals(messages[0], message3) self.assertEquals(messages[1], message4)
def test_check_offset(self): # Produce/consume a message, check that the next offset looks correct message1 = KafkaClient.create_message("testing 1") req = ProduceRequest("test-check-offset", 0, [message1]) self.kafka.send_message_set(req) self.assertTrue( self.server.wait_for("Created log for 'test-check-offset'-0")) self.assertTrue( self.server.wait_for("Flushing log 'test-check-offset-0'")) req = FetchRequest("test-check-offset", 0, 0, 1024) (messages, nextReq) = self.kafka.get_message_set(req) self.assertEquals(len(messages), 1) self.assertEquals(messages[0], message1) self.assertEquals(nextReq.offset, len(KafkaClient.encode_message(message1))) # Produce another message, consume with the last offset message2 = KafkaClient.create_message("test 2") req = ProduceRequest("test-check-offset", 0, [message2]) self.kafka.send_message_set(req) self.assertTrue( self.server.wait_for("Flushing log 'test-check-offset-0'")) # Verify (messages, nextReq) = self.kafka.get_message_set(nextReq) self.assertEquals(len(messages), 1) self.assertEquals(messages[0], message2) self.assertEquals( nextReq.offset, len(KafkaClient.encode_message(message1)) + len(KafkaClient.encode_message(message2)))
def test_check_offset(self): # Produce/consume a message, check that the next offset looks correct message1 = KafkaClient.create_message("testing 1") req = ProduceRequest("test-check-offset", 0, [message1]) self.kafka.send_message_set(req) self.assertTrue(self.server.wait_for("Created log for 'test-check-offset'-0")) self.assertTrue(self.server.wait_for("Flushing log 'test-check-offset-0'")) req = FetchRequest("test-check-offset", 0, 0, 1024) (messages, nextReq) = self.kafka.get_message_set(req) self.assertEquals(len(messages), 1) self.assertEquals(messages[0], message1) self.assertEquals(nextReq.offset, len(KafkaClient.encode_message(message1))) # Produce another message, consume with the last offset message2 = KafkaClient.create_message("test 2") req = ProduceRequest("test-check-offset", 0, [message2]) self.kafka.send_message_set(req) self.assertTrue(self.server.wait_for("Flushing log 'test-check-offset-0'")) # Verify (messages, nextReq) = self.kafka.get_message_set(nextReq) self.assertEquals(len(messages), 1) self.assertEquals(messages[0], message2) self.assertEquals( nextReq.offset, len(KafkaClient.encode_message(message1)) + len(KafkaClient.encode_message(message2)) )
def test_produce(self): # Produce a message, check that the log got created req = ProduceRequest("test-produce", 0, [KafkaClient.create_message("testing")]) self.kafka.send_message_set(req) self.assertTrue(self.server.wait_for("Created log for 'test-produce'-0")) # Same thing, different partition req = ProduceRequest("test-produce", 1, [KafkaClient.create_message("testing")]) self.kafka.send_message_set(req) self.assertTrue(self.server.wait_for("Created log for 'test-produce'-1"))
def test_produce(self): # Produce a message, check that the log got created req = ProduceRequest("test-produce", 0, [KafkaClient.create_message("testing")]) self.kafka.send_message_set(req) self.assertTrue( self.server.wait_for("Created log for 'test-produce'-0")) # Same thing, different partition req = ProduceRequest("test-produce", 1, [KafkaClient.create_message("testing")]) self.kafka.send_message_set(req) self.assertTrue( self.server.wait_for("Created log for 'test-produce'-1"))
def test_message_list(self): msgs = [ KafkaClient.create_message("one"), KafkaClient.create_message("two"), KafkaClient.create_message("three") ] enc = KafkaClient.encode_message_set(msgs) expect = ("\x00\x00\x00\t\x01\x00zl\x86\xf1one\x00\x00\x00\t\x01\x00\x11" "\xca\x8aftwo\x00\x00\x00\x0b\x01\x00F\xc5\xd8\xf5three") self.assertEquals(enc, expect) (messages, read) = KafkaClient.read_message_set(enc) self.assertEquals(len(messages), 3) self.assertEquals(messages[0].payload, "one") self.assertEquals(messages[1].payload, "two") self.assertEquals(messages[2].payload, "three")
def test_iterator(self): # Produce 100 messages messages = [] for i in range(100): messages.append(KafkaClient.create_message("testing %d" % i)) req = ProduceRequest("test-iterator", 0, messages) self.kafka.send_message_set(req) self.assertTrue( self.server.wait_for("Created log for 'test-iterator'-0")) self.assertTrue(self.server.wait_for("Flushing log 'test-iterator-0'")) # Initialize an iterator of fetch size 64 bytes - big enough for one message # but not enough for all 100 messages cnt = 0 for i, msg in enumerate( self.kafka.iter_messages("test-iterator", 0, 0, 64)): self.assertEquals(messages[i], msg) cnt += 1 self.assertEquals(cnt, 100) # Same thing, but don't auto paginate cnt = 0 for i, msg in enumerate( self.kafka.iter_messages("test-iterator", 0, 0, 64, False)): self.assertEquals(messages[i], msg) cnt += 1 self.assertTrue(cnt < 100)
def run(self): self.barrier.wait() log.info("Starting %s" % self) messages = [] last_produce = time.time() def flush(messages): self.client.send_message_set(ProduceRequest(self.topic, -1, messages)) del messages[:] while True: if self.barrier.is_set() is False: log.info("Shutdown %s, flushing messages" % self) flush(messages) self.client.close() break if len(messages) > self.producer_flush_buffer: log.debug("Message count threshold reached. Flushing messages") flush(messages) last_produce = time.time() elif (time.time() - last_produce) > self.producer_flush_timeout: log.debug("Producer timeout reached. Flushing messages") flush(messages) last_produce = time.time() try: msg = KafkaClient.create_message( self.in_queue.get(True, self.producer_timeout)) messages.append(msg) except Empty: continue
def test_message_list(self): msgs = [ KafkaClient.create_message("one"), KafkaClient.create_message("two"), KafkaClient.create_message("three") ] enc = KafkaClient.encode_message_set(msgs) expect = ( "\x00\x00\x00\t\x01\x00zl\x86\xf1one\x00\x00\x00\t\x01\x00\x11" "\xca\x8aftwo\x00\x00\x00\x0b\x01\x00F\xc5\xd8\xf5three") self.assertEquals(enc, expect) (messages, read) = KafkaClient.read_message_set(enc) self.assertEquals(len(messages), 3) self.assertEquals(messages[0].payload, "one") self.assertEquals(messages[1].payload, "two") self.assertEquals(messages[2].payload, "three")
def run(self): self.barrier.wait() log.info("Starting %s" % self) messages = [] last_produce = time.time() def flush(messages): self.client.send_message_set( ProduceRequest(self.topic, -1, messages)) del messages[:] while True: if self.barrier.is_set() is False: log.info("Shutdown %s, flushing messages" % self) flush(messages) self.client.close() break if len(messages) > self.producer_flush_buffer: log.debug("Message count threshold reached. Flushing messages") flush(messages) last_produce = time.time() elif (time.time() - last_produce) > self.producer_flush_timeout: log.debug("Producer timeout reached. Flushing messages") flush(messages) last_produce = time.time() try: msg = KafkaClient.create_message( self.in_queue.get(True, self.producer_timeout)) messages.append(msg) except Empty: continue
def test_message_simple(self): msg = KafkaClient.create_message("testing") enc = KafkaClient.encode_message(msg) expect = "\x00\x00\x00\r\x01\x00\xe8\xf3Z\x06testing" self.assertEquals(enc, expect) (messages, read) = KafkaClient.read_message_set(enc) self.assertEquals(len(messages), 1) self.assertEquals(messages[0], msg)
def test_message_simple(self): msg = KafkaClient.create_message("testing") enc = KafkaClient.encode_message(msg) expect = "\x00\x00\x00\r\x01\x00\xe8\xf3Z\x06testing" self.assertEquals(enc, expect) (messages, read) = KafkaClient.read_message_set(enc) self.assertEquals(len(messages), 1) self.assertEquals(messages[0], msg)
def test_message_simple_random(self): for i in xrange(ITERATIONS): n = random.randint(0, 10) msgs = [KafkaClient.create_message(random_string()) for j in range(n)] enc = KafkaClient.encode_message_set(msgs) (messages, read) = KafkaClient.read_message_set(enc) self.assertEquals(len(messages), n) for j in range(n): self.assertEquals(messages[j], msgs[j])
def test_message_simple_random(self): for i in xrange(ITERATIONS): n = random.randint(0, 10) msgs = [ KafkaClient.create_message(random_string()) for j in range(n) ] enc = KafkaClient.encode_message_set(msgs) (messages, read) = KafkaClient.read_message_set(enc) self.assertEquals(len(messages), n) for j in range(n): self.assertEquals(messages[j], msgs[j])
def test_offset_request(self): # Produce a message to create the topic/partition message1 = KafkaClient.create_message("testing 1") req = ProduceRequest("test-offset-request", 0, [message1]) self.kafka.send_message_set(req) self.assertTrue(self.server.wait_for("Created log for 'test-offset-request'-0")) self.assertTrue(self.server.wait_for("Flushing log 'test-offset-request-0'")) t1 = int(time.time() * 1000) # now t2 = t1 + 60000 # one minute from now req = OffsetRequest("test-offset-request", 0, t1, 1024) print self.kafka.get_offsets(req) req = OffsetRequest("test-offset-request", 0, t2, 1024) print self.kafka.get_offsets(req)
def test_offset_request(self): # Produce a message to create the topic/partition message1 = KafkaClient.create_message("testing 1") req = ProduceRequest("test-offset-request", 0, [message1]) self.kafka.send_message_set(req) self.assertTrue( self.server.wait_for("Created log for 'test-offset-request'-0")) self.assertTrue( self.server.wait_for("Flushing log 'test-offset-request-0'")) t1 = int(time.time() * 1000) # now t2 = t1 + 60000 # one minute from now req = OffsetRequest("test-offset-request", 0, t1, 1024) self.kafka.get_offsets(req) req = OffsetRequest("test-offset-request", 0, t2, 1024) self.kafka.get_offsets(req)
def test_iterator(self): # Produce 100 messages messages = [] for i in range(100): messages.append(KafkaClient.create_message("testing %d" % i)) req = ProduceRequest("test-iterator", 0, messages) self.kafka.send_message_set(req) self.assertTrue(self.server.wait_for("Created log for 'test-iterator'-0")) self.assertTrue(self.server.wait_for("Flushing log 'test-iterator-0'")) # Initialize an iterator of fetch size 64 bytes - big enough for one message # but not enough for all 100 messages cnt = 0 for i, msg in enumerate(self.kafka.iter_messages("test-iterator", 0, 0, 64)): self.assertEquals(messages[i], msg) cnt += 1 self.assertEquals(cnt, 100) # Same thing, but don't auto paginate cnt = 0 for i, msg in enumerate(self.kafka.iter_messages("test-iterator", 0, 0, 64, False)): self.assertEquals(messages[i], msg) cnt += 1 self.assertTrue(cnt < 100)
def test_produce_request(self): req = ProduceRequest("my-topic", 0, [KafkaClient.create_message("testing")]) enc = KafkaClient.encode_produce_request(req) expect = "\x00\x00\x00\x08my-topic\x00\x00\x00\x00\x00\x00\x00\x11\x00\x00\x00\r\x01\x00\xe8\xf3Z\x06testing" self.assertEquals(enc, expect)
def test_create(self): msg = KafkaClient.create_message("testing") self.assertEquals(msg.payload, "testing") self.assertEquals(msg.magic, 1) self.assertEquals(msg.attributes, 0) self.assertEquals(msg.crc, -386704890)
def test_produce_request(self): req = ProduceRequest("my-topic", 0, [KafkaClient.create_message("testing")]) enc = KafkaClient.encode_produce_request(req) expect = "\x00\x00\x00\x08my-topic\x00\x00\x00\x00\x00\x00\x00\x11\x00\x00\x00\r\x01\x00\xe8\xf3Z\x06testing" self.assertEquals(enc, expect)
def test_create(self): msg = KafkaClient.create_message("testing") self.assertEquals(msg.payload, "testing") self.assertEquals(msg.magic, 1) self.assertEquals(msg.attributes, 0) self.assertEquals(msg.crc, -386704890)
class KfkClient(object): def __init__(self, ip): self.client = KafkaClient(ip, 9092) self.fd = None self.topic = None self.partition = None self.offset = None def send(self, topic, partition, data): message = self.client.create_message(data) request = ProduceRequest(topic, partition, [message]) self.client.send_message_set(request) def _check_offset(self, topic, partition): if (self.topic != topic or self.partition != partition): self.topic = topic self.partition = partition self._get_new_offset() def receive(self, topic, partition): self._check_offset(topic, partition) while True: request = FetchRequest(topic, partition, self.offset, 2048) debug(request) try: (messages, nextRequest) = self.client.get_message_set(request) except e: self._check_offset(topic, partition) continue if len(messages) > 0: self.offset = nextRequest.offset self._write_offset() return messages else: time.sleep(1) def get_line(self, topic, partition): while True: messages = self.receive(topic, partition) for message in messages: yield message.payload def close(self): if self.fd is not None: self.fd.close() self.client.close() def _get_new_offset(self): file_name = "%s-%s.offset" % (self.topic, self.partition) if self.fd is not None: self.fd.close() try: self.fd = open(file_name, 'r+') file_offset = self.fd.readline() except IOError: self.fd = open(file_name, 'w+') file_offset = -1 self.fd.seek(0,0) self.fd.truncate() try: file_offset = int(file_offset) except: file_offset = 0 minoffsetreq = OffsetRequest(self.topic, self.partition, -2, 1) results = self.client.get_offsets(minoffsetreq) minoffset = results[0] maxoffsetreq = OffsetRequest(self.topic, self.partition, -1, 1) results = self.client.get_offsets(maxoffsetreq) maxoffset = results[0] if file_offset == -1: self.offset = minoffset elif file_offset >= minoffset and file_offset <= maxoffset: self.offset = file_offset else: self.offset = maxoffset debug ("file%d min%d max%d using%d" % (file_offset, minoffset, maxoffset, self.offset)) self._write_offset() def _write_offset(self): self.fd.seek(0,0) self.fd.write("%d" % self.offset)