def test_hashed_partitioner(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send(self.topic, self.key("1"), self.msg("one")) resp2 = producer.send(self.topic, self.key("2"), self.msg("two")) resp3 = producer.send(self.topic, self.key("3"), self.msg("three")) resp4 = producer.send(self.topic, self.key("3"), self.msg("four")) resp5 = producer.send(self.topic, self.key("4"), self.msg("five")) offsets = {0: start_offset0, 1: start_offset1} messages = {0: [], 1: []} keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]] resps = [resp1, resp2, resp3, resp4, resp5] msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]] for key, resp, msg in zip(keys, resps, msgs): k = hash(key) % 2 offset = offsets[k] self.assert_produce_response(resp, offset) offsets[k] += 1 messages[k].append(msg) self.assert_fetch_offset(0, start_offset0, messages[0]) self.assert_fetch_offset(1, start_offset1, messages[1]) producer.stop()
def test_hashed_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send(self.topic, self.key("1"), self.msg("one")) resp2 = producer.send(self.topic, self.key("2"), self.msg("two")) resp3 = producer.send(self.topic, self.key("3"), self.msg("three")) resp4 = producer.send(self.topic, self.key("3"), self.msg("four")) resp5 = producer.send(self.topic, self.key("4"), self.msg("five")) offsets = {partitions[0]: start_offsets[0], partitions[1]: start_offsets[1]} messages = {partitions[0]: [], partitions[1]: []} keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]] resps = [resp1, resp2, resp3, resp4, resp5] msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]] for key, resp, msg in zip(keys, resps, msgs): k = hash(key) % 2 partition = partitions[k] offset = offsets[partition] self.assert_produce_response(resp, offset) offsets[partition] += 1 messages[partition].append(msg) self.assert_fetch_offset(partitions[0], start_offsets[0], messages[partitions[0]]) self.assert_fetch_offset(partitions[1], start_offsets[1], messages[partitions[1]]) producer.stop()
def test_round_robin_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) resp1 = producer.send(self.topic, self.key("key1"), self.msg("one")) resp2 = producer.send(self.topic, self.key("key2"), self.msg("two")) resp3 = producer.send(self.topic, self.key("key3"), self.msg("three")) resp4 = producer.send(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp1, start_offsets[0] + 0) self.assert_produce_response(resp2, start_offsets[1] + 0) self.assert_produce_response(resp3, start_offsets[0] + 1) self.assert_produce_response(resp4, start_offsets[1] + 1) self.assert_fetch_offset( partitions[0], start_offsets[0], [self.msg("one"), self.msg("three")]) self.assert_fetch_offset( partitions[1], start_offsets[1], [self.msg("two"), self.msg("four")]) producer.stop()
def test_hashed_partitioner(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send(self.topic, 1, self.msg("one")) resp2 = producer.send(self.topic, 2, self.msg("two")) resp3 = producer.send(self.topic, 3, self.msg("three")) resp4 = producer.send(self.topic, 3, self.msg("four")) resp5 = producer.send(self.topic, 4, self.msg("five")) self.assert_produce_response(resp1, start_offset1 + 0) self.assert_produce_response(resp2, start_offset0 + 0) self.assert_produce_response(resp3, start_offset1 + 1) self.assert_produce_response(resp4, start_offset1 + 2) self.assert_produce_response(resp5, start_offset0 + 1) self.assert_fetch_offset( 0, start_offset0, [self.msg("two"), self.msg("five")]) self.assert_fetch_offset( 1, start_offset1, [self.msg("one"), self.msg("three"), self.msg("four")]) producer.stop()
class KafkaLoggingHandler(logging.Handler): def __init__(self, host, port, topic, key=None): logging.Handler.__init__(self) self.kafka_client = KafkaClient(host, port) self.key = key if key is None: self.producer = SimpleProducer(self.kafka_client, topic) else: self.producer = KeyedProducer(self.kafka_client, topic) def emit(self, record): #drop kafka logging to avoid infinite recursion if record.name == 'kafka': return try: #use default formatting msg = self.format(record) #produce message if self.key is None: self.producer.send_messages(msg) else: self.producer.send(self.key, msg) except: import traceback ei = sys.exc_info() traceback.print_exception(ei[0], ei[1], ei[2], None, sys.stderr) del ei def close(self): self.producer.stop() logging.Handler.close(self)
class KafkaLoggingHandler(logging.Handler): """ 形成 kafka 日志handle """ def __init__(self, host, topic, **kwargs): logging.Handler.__init__(self) self.key = kwargs.get("key", None) self.kafka_topic_name = topic if not self.key: self.producer = KafkaProducer(bootstrap_servers=host, api_version=(0, 10, 1), **kwargs) else: self.producer = KeyedProducer(bootstrap_servers=host, api_version=(0, 10, 1), **kwargs) def emit(self, record): # 忽略kafka的日志,以免导致无限递归。 if 'kafka' in record.name: return try: # 格式化日志并指定编码为utf-8 print(f'record : {record}') message = { 'eventId': str(event_id), "eventChannel": record.name, 'hostName': hostName, 'address': host_ip, 'eventTime': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(record.created)), 'level': record.levelname, 'message': record.msg, 'throwableInfo': record.exc_text } mess = json.dumps(message) mess = bytes(mess, encoding='utf8') # msg = self.format(record) # if isinstance(msg, unicode): # msg = msg.encode("utf-8") # # kafka生产者,发送消息到broker。 if not self.key: self.producer.send(self.kafka_topic_name, None, mess) else: self.producer.send(self.kafka_topic_name, self.key, mess) except (KeyboardInterrupt, SystemExit): raise except Exception: self.handleError(record)
def genData(topic): producer = KeyedProducer(kafka) while True: with open(source_file) as f: for line in f: key = line.split(" ")[0] producer.send(topic, key, line.rstrip()) time.sleep(0.1) # Creating some delay to allow proper rendering of the cab locations on the map source_file.close()
def genData(topic): producer = KeyedProducer(kafka) while True: for line in fileinput.input(source_file): key = line.split("\t")[0] print key print line.rstrip() producer.send(topic, key, line.rstrip()) time.sleep(0.1) # Creating some delay to allow fileinput.close()
def genData(topic): producer = KeyedProducer(kafka) while True: with open(source_file) as f: for line in f: key = line.split(" ")[0] producer.send(topic, key, line.rstrip()) time.sleep( 0.1 ) # Creating some delay to allow proper rendering of the cab locations on the map source_file.close()
class NautilusDive(object): def __init__(self, config): self.brokers = config['brokers'] self.topic = config['topic'] self.kafka = KafkaClient(self.brokers) if config['partitioner'] is None: self.producer = KeyedProducer(self.kafka, partitioner=RoundRobinPartitioner) else: self.producer = KeyedProducer(self.kafka, partitioner=config['partitioner']) def send(self, key, message): self.producer.send(self.topic, key, message)
def test_round_robin_partitioner(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) resp1 = producer.send(self.topic, self.key("key1"), self.msg("one")) resp2 = producer.send(self.topic, self.key("key2"), self.msg("two")) resp3 = producer.send(self.topic, self.key("key3"), self.msg("three")) resp4 = producer.send(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp1, start_offset0+0) self.assert_produce_response(resp2, start_offset1+0) self.assert_produce_response(resp3, start_offset0+1) self.assert_produce_response(resp4, start_offset1+1) self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("three") ]) self.assert_fetch_offset(1, start_offset1, [ self.msg("two"), self.msg("four") ]) producer.stop()
def test_round_robin_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) resp1 = producer.send(self.topic, self.key("key1"), self.msg("one")) resp2 = producer.send(self.topic, self.key("key2"), self.msg("two")) resp3 = producer.send(self.topic, self.key("key3"), self.msg("three")) resp4 = producer.send(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp1, start_offsets[0]+0) self.assert_produce_response(resp2, start_offsets[1]+0) self.assert_produce_response(resp3, start_offsets[0]+1) self.assert_produce_response(resp4, start_offsets[1]+1) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("three") ]) self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("two"), self.msg("four") ]) producer.stop()
def test_async_keyed_producer(self): start_offset0 = self.current_offset(self.topic, 0) producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True) resp = producer.send(self.topic, self.key("key1"), self.msg("one")) self.assertEqual(len(resp), 0) self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ]) producer.stop()
class KafkaBolt(Bolt): def initialize(self, stormconf, ctx): self.kafka_client = KafkaClient(config['kafka']['hosts']) self.keyed_producer = KeyedProducer(self.kafka_client) self.simple_producer = SimpleProducer(self.kafka_client) def process(self, tup): report_id, record_type, report_data = tup.values self.log('Processing: %s' % report_id) json_data = str(report_data) report_id = str(report_id) topic = str("sanitised") if record_type == "entry": payload = str("e" + json_data) elif record_type == "header": payload = str("h" + json_data) elif record_type == "footer": payload = str("f" + json_data) self.keyed_producer.send(topic, report_id, payload)
def test_hashed_partitioner(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send(self.topic, 1, self.msg("one")) resp2 = producer.send(self.topic, 2, self.msg("two")) resp3 = producer.send(self.topic, 3, self.msg("three")) resp4 = producer.send(self.topic, 3, self.msg("four")) resp5 = producer.send(self.topic, 4, self.msg("five")) self.assert_produce_response(resp1, start_offset1+0) self.assert_produce_response(resp2, start_offset0+0) self.assert_produce_response(resp3, start_offset1+1) self.assert_produce_response(resp4, start_offset1+2) self.assert_produce_response(resp5, start_offset0+1) self.assert_fetch_offset(0, start_offset0, [ self.msg("two"), self.msg("five") ]) self.assert_fetch_offset(1, start_offset1, [ self.msg("one"), self.msg("three"), self.msg("four") ]) producer.stop()
def test_async_keyed_producer(self): start_offset0 = self.current_offset(self.topic, 0) producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner, async=True) resp = producer.send(self.topic, self.key("key1"), self.msg("one")) self.assertEqual(len(resp), 0) self.assert_fetch_offset(0, start_offset0, [self.msg("one")]) producer.stop()
def test_hashed_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send(self.topic, self.key("1"), self.msg("one")) resp2 = producer.send(self.topic, self.key("2"), self.msg("two")) resp3 = producer.send(self.topic, self.key("3"), self.msg("three")) resp4 = producer.send(self.topic, self.key("3"), self.msg("four")) resp5 = producer.send(self.topic, self.key("4"), self.msg("five")) offsets = { partitions[0]: start_offsets[0], partitions[1]: start_offsets[1] } messages = {partitions[0]: [], partitions[1]: []} keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]] resps = [resp1, resp2, resp3, resp4, resp5] msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]] for key, resp, msg in zip(keys, resps, msgs): k = hash(key) % 2 partition = partitions[k] offset = offsets[partition] self.assert_produce_response(resp, offset) offsets[partition] += 1 messages[partition].append(msg) self.assert_fetch_offset(partitions[0], start_offsets[0], messages[partitions[0]]) self.assert_fetch_offset(partitions[1], start_offsets[1], messages[partitions[1]]) producer.stop()
def test_round_robin_partitioner(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) resp1 = producer.send(self.topic, self.key("key1"), self.msg("one")) resp2 = producer.send(self.topic, self.key("key2"), self.msg("two")) resp3 = producer.send(self.topic, self.key("key3"), self.msg("three")) resp4 = producer.send(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp1, start_offset0 + 0) self.assert_produce_response(resp2, start_offset1 + 0) self.assert_produce_response(resp3, start_offset0 + 1) self.assert_produce_response(resp4, start_offset1 + 1) self.assert_fetch_offset( 0, start_offset0, [self.msg("one"), self.msg("three")]) self.assert_fetch_offset( 1, start_offset1, [self.msg("two"), self.msg("four")]) producer.stop()
def test_async_keyed_producer(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True) resp = producer.send(self.topic, self.key("key1"), self.msg("one")) self.assertEqual(len(resp), 0) # wait for the server to report a new highwatermark while self.current_offset(self.topic, partition) == start_offset: time.sleep(0.1) self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ]) producer.stop()
def test_async_keyed_producer(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner, async=True) resp = producer.send(self.topic, self.key("key1"), self.msg("one")) self.assertEqual(len(resp), 0) # wait for the server to report a new highwatermark while self.current_offset(self.topic, partition) == start_offset: time.sleep(0.1) self.assert_fetch_offset(partition, start_offset, [self.msg("one")]) producer.stop()
furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ''' import time import datetime from kafka import KafkaClient, KeyedProducer, from kafka import HashedPartitioner, RoundRobinPartitioner kafka = KafkaClient("localhost:6667") #Default partitioner is HashedPartitioner producer = KeyedProducer(kafka) producer.send("test", "key1", "Test message with key1") producer.send("test", "key2", "Test message with key2") #Using RoundRobinPartitioner producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner) producer.send("test", "key3", "Test message with key3") producer.send("test", "key4", "Test message with key4")