def test_hashed_partitioner(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)

        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
        resp1 = producer.send(self.topic, self.key("1"), self.msg("one"))
        resp2 = producer.send(self.topic, self.key("2"), self.msg("two"))
        resp3 = producer.send(self.topic, self.key("3"), self.msg("three"))
        resp4 = producer.send(self.topic, self.key("3"), self.msg("four"))
        resp5 = producer.send(self.topic, self.key("4"), self.msg("five"))

        offsets = {0: start_offset0, 1: start_offset1}
        messages = {0: [], 1: []}

        keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]]
        resps = [resp1, resp2, resp3, resp4, resp5]
        msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]]

        for key, resp, msg in zip(keys, resps, msgs):
            k = hash(key) % 2
            offset = offsets[k]
            self.assert_produce_response(resp, offset)
            offsets[k] += 1
            messages[k].append(msg)

        self.assert_fetch_offset(0, start_offset0, messages[0])
        self.assert_fetch_offset(1, start_offset1, messages[1])

        producer.stop()
    def test_hashed_partitioner(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
        resp1 = producer.send(self.topic, self.key("1"), self.msg("one"))
        resp2 = producer.send(self.topic, self.key("2"), self.msg("two"))
        resp3 = producer.send(self.topic, self.key("3"), self.msg("three"))
        resp4 = producer.send(self.topic, self.key("3"), self.msg("four"))
        resp5 = producer.send(self.topic, self.key("4"), self.msg("five"))

        offsets = {partitions[0]: start_offsets[0], partitions[1]: start_offsets[1]}
        messages = {partitions[0]: [], partitions[1]: []}

        keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]]
        resps = [resp1, resp2, resp3, resp4, resp5]
        msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]]

        for key, resp, msg in zip(keys, resps, msgs):
            k = hash(key) % 2
            partition = partitions[k]
            offset = offsets[partition]
            self.assert_produce_response(resp, offset)
            offsets[partition] += 1
            messages[partition].append(msg)

        self.assert_fetch_offset(partitions[0], start_offsets[0], messages[partitions[0]])
        self.assert_fetch_offset(partitions[1], start_offsets[1], messages[partitions[1]])

        producer.stop()
Esempio n. 3
0
    def test_hashed_partitioner(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)

        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
        resp1 = producer.send(self.topic, self.key("1"), self.msg("one"))
        resp2 = producer.send(self.topic, self.key("2"), self.msg("two"))
        resp3 = producer.send(self.topic, self.key("3"), self.msg("three"))
        resp4 = producer.send(self.topic, self.key("3"), self.msg("four"))
        resp5 = producer.send(self.topic, self.key("4"), self.msg("five"))

        offsets = {0: start_offset0, 1: start_offset1}
        messages = {0: [], 1: []}

        keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]]
        resps = [resp1, resp2, resp3, resp4, resp5]
        msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]]

        for key, resp, msg in zip(keys, resps, msgs):
            k = hash(key) % 2
            offset = offsets[k]
            self.assert_produce_response(resp, offset)
            offsets[k] += 1
            messages[k].append(msg)

        self.assert_fetch_offset(0, start_offset0, messages[0])
        self.assert_fetch_offset(1, start_offset1, messages[1])

        producer.stop()
Esempio n. 4
0
    def test_round_robin_partitioner(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        producer = KeyedProducer(self.client,
                                 partitioner=RoundRobinPartitioner)
        resp1 = producer.send(self.topic, self.key("key1"), self.msg("one"))
        resp2 = producer.send(self.topic, self.key("key2"), self.msg("two"))
        resp3 = producer.send(self.topic, self.key("key3"), self.msg("three"))
        resp4 = producer.send(self.topic, self.key("key4"), self.msg("four"))

        self.assert_produce_response(resp1, start_offsets[0] + 0)
        self.assert_produce_response(resp2, start_offsets[1] + 0)
        self.assert_produce_response(resp3, start_offsets[0] + 1)
        self.assert_produce_response(resp4, start_offsets[1] + 1)

        self.assert_fetch_offset(
            partitions[0], start_offsets[0],
            [self.msg("one"), self.msg("three")])
        self.assert_fetch_offset(
            partitions[1], start_offsets[1],
            [self.msg("two"), self.msg("four")])

        producer.stop()
Esempio n. 5
0
    def test_hashed_partitioner(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)

        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
        resp1 = producer.send(self.topic, 1, self.msg("one"))
        resp2 = producer.send(self.topic, 2, self.msg("two"))
        resp3 = producer.send(self.topic, 3, self.msg("three"))
        resp4 = producer.send(self.topic, 3, self.msg("four"))
        resp5 = producer.send(self.topic, 4, self.msg("five"))

        self.assert_produce_response(resp1, start_offset1 + 0)
        self.assert_produce_response(resp2, start_offset0 + 0)
        self.assert_produce_response(resp3, start_offset1 + 1)
        self.assert_produce_response(resp4, start_offset1 + 2)
        self.assert_produce_response(resp5, start_offset0 + 1)

        self.assert_fetch_offset(
            0, start_offset0,
            [self.msg("two"), self.msg("five")])
        self.assert_fetch_offset(
            1, start_offset1,
            [self.msg("one"),
             self.msg("three"),
             self.msg("four")])

        producer.stop()
Esempio n. 6
0
class KafkaLoggingHandler(logging.Handler):

    def __init__(self, host, port, topic, key=None):
        logging.Handler.__init__(self)
        self.kafka_client = KafkaClient(host, port)
        self.key = key
        if key is None:
            self.producer = SimpleProducer(self.kafka_client, topic)
        else:
            self.producer = KeyedProducer(self.kafka_client, topic)

    def emit(self, record):
        #drop kafka logging to avoid infinite recursion
        if record.name == 'kafka':
            return
        try:
            #use default formatting
            msg = self.format(record)
            #produce message
            if self.key is None:
                self.producer.send_messages(msg)
            else:
                self.producer.send(self.key, msg)
        except:
            import traceback
            ei = sys.exc_info()
            traceback.print_exception(ei[0], ei[1], ei[2], None, sys.stderr)
            del ei

    def close(self):
        self.producer.stop()
        logging.Handler.close(self)
Esempio n. 7
0
class KafkaLoggingHandler(logging.Handler):
    """
    形成 kafka 日志handle
    """
    def __init__(self, host, topic, **kwargs):
        logging.Handler.__init__(self)
        self.key = kwargs.get("key", None)
        self.kafka_topic_name = topic

        if not self.key:
            self.producer = KafkaProducer(bootstrap_servers=host,
                                          api_version=(0, 10, 1),
                                          **kwargs)
        else:
            self.producer = KeyedProducer(bootstrap_servers=host,
                                          api_version=(0, 10, 1),
                                          **kwargs)

    def emit(self, record):
        # 忽略kafka的日志,以免导致无限递归。
        if 'kafka' in record.name:
            return
        try:
            # 格式化日志并指定编码为utf-8
            print(f'record : {record}')
            message = {
                'eventId':
                str(event_id),
                "eventChannel":
                record.name,
                'hostName':
                hostName,
                'address':
                host_ip,
                'eventTime':
                time.strftime("%Y-%m-%d %H:%M:%S",
                              time.localtime(record.created)),
                'level':
                record.levelname,
                'message':
                record.msg,
                'throwableInfo':
                record.exc_text
            }
            mess = json.dumps(message)
            mess = bytes(mess, encoding='utf8')
            # msg = self.format(record)
            # if isinstance(msg, unicode):
            #     msg = msg.encode("utf-8")
            # # kafka生产者,发送消息到broker。
            if not self.key:
                self.producer.send(self.kafka_topic_name, None, mess)
            else:
                self.producer.send(self.kafka_topic_name, self.key, mess)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception:
            self.handleError(record)
Esempio n. 8
0
def genData(topic):
    producer = KeyedProducer(kafka)
    while True:
        with open(source_file) as f:
            for line in f:
                key = line.split(" ")[0]
                producer.send(topic, key, line.rstrip()) 
	        time.sleep(0.1)  # Creating some delay to allow proper rendering of the cab locations on the map
        
        source_file.close()
Esempio n. 9
0
def genData(topic):
    producer = KeyedProducer(kafka)
    while True:

        for line in fileinput.input(source_file):
            key = line.split("\t")[0]
            print key
            print line.rstrip()
            producer.send(topic, key, line.rstrip())
            time.sleep(0.1)  # Creating some delay to allow
        fileinput.close()
Esempio n. 10
0
def genData(topic):
    producer = KeyedProducer(kafka)
    while True:
        with open(source_file) as f:
            for line in f:
                key = line.split(" ")[0]
                producer.send(topic, key, line.rstrip())
                time.sleep(
                    0.1
                )  # Creating some delay to allow proper rendering of the cab locations on the map

        source_file.close()
Esempio n. 11
0
class NautilusDive(object):
    def __init__(self, config):
        self.brokers = config['brokers']
        self.topic = config['topic']
        self.kafka = KafkaClient(self.brokers)
        if config['partitioner'] is None:
            self.producer = KeyedProducer(self.kafka, partitioner=RoundRobinPartitioner)
        else:
            self.producer = KeyedProducer(self.kafka, partitioner=config['partitioner'])

    def send(self, key, message):
        self.producer.send(self.topic, key, message)
    def test_round_robin_partitioner(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)

        producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner)
        resp1 = producer.send(self.topic, self.key("key1"), self.msg("one"))
        resp2 = producer.send(self.topic, self.key("key2"), self.msg("two"))
        resp3 = producer.send(self.topic, self.key("key3"), self.msg("three"))
        resp4 = producer.send(self.topic, self.key("key4"), self.msg("four"))

        self.assert_produce_response(resp1, start_offset0+0)
        self.assert_produce_response(resp2, start_offset1+0)
        self.assert_produce_response(resp3, start_offset0+1)
        self.assert_produce_response(resp4, start_offset1+1)

        self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("three") ])
        self.assert_fetch_offset(1, start_offset1, [ self.msg("two"), self.msg("four")  ])

        producer.stop()
    def test_round_robin_partitioner(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

        producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner)
        resp1 = producer.send(self.topic, self.key("key1"), self.msg("one"))
        resp2 = producer.send(self.topic, self.key("key2"), self.msg("two"))
        resp3 = producer.send(self.topic, self.key("key3"), self.msg("three"))
        resp4 = producer.send(self.topic, self.key("key4"), self.msg("four"))

        self.assert_produce_response(resp1, start_offsets[0]+0)
        self.assert_produce_response(resp2, start_offsets[1]+0)
        self.assert_produce_response(resp3, start_offsets[0]+1)
        self.assert_produce_response(resp4, start_offsets[1]+1)

        self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("three") ])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("two"), self.msg("four")  ])

        producer.stop()
    def test_async_keyed_producer(self):
        start_offset0 = self.current_offset(self.topic, 0)

        producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True)

        resp = producer.send(self.topic, self.key("key1"), self.msg("one"))
        self.assertEqual(len(resp), 0)

        self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])

        producer.stop()
Esempio n. 15
0
class KafkaBolt(Bolt):

    def initialize(self, stormconf, ctx):
        self.kafka_client = KafkaClient(config['kafka']['hosts'])
        self.keyed_producer = KeyedProducer(self.kafka_client)
        self.simple_producer = SimpleProducer(self.kafka_client)

    def process(self, tup):
        report_id, record_type, report_data = tup.values
        self.log('Processing: %s' % report_id)
        json_data = str(report_data)
        report_id = str(report_id)
        topic = str("sanitised")
        if record_type == "entry":
            payload = str("e" + json_data)
        elif record_type == "header":
            payload = str("h" + json_data)
        elif record_type == "footer":
            payload = str("f" + json_data)
        self.keyed_producer.send(topic, report_id, payload)
    def test_hashed_partitioner(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)

        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
        resp1 = producer.send(self.topic, 1, self.msg("one"))
        resp2 = producer.send(self.topic, 2, self.msg("two"))
        resp3 = producer.send(self.topic, 3, self.msg("three"))
        resp4 = producer.send(self.topic, 3, self.msg("four"))
        resp5 = producer.send(self.topic, 4, self.msg("five"))

        self.assert_produce_response(resp1, start_offset1+0)
        self.assert_produce_response(resp2, start_offset0+0)
        self.assert_produce_response(resp3, start_offset1+1)
        self.assert_produce_response(resp4, start_offset1+2)
        self.assert_produce_response(resp5, start_offset0+1)

        self.assert_fetch_offset(0, start_offset0, [ self.msg("two"), self.msg("five") ])
        self.assert_fetch_offset(1, start_offset1, [ self.msg("one"), self.msg("three"), self.msg("four") ])

        producer.stop()
Esempio n. 17
0
    def test_async_keyed_producer(self):
        start_offset0 = self.current_offset(self.topic, 0)

        producer = KeyedProducer(self.client,
                                 partitioner=RoundRobinPartitioner,
                                 async=True)

        resp = producer.send(self.topic, self.key("key1"), self.msg("one"))
        self.assertEqual(len(resp), 0)

        self.assert_fetch_offset(0, start_offset0, [self.msg("one")])

        producer.stop()
Esempio n. 18
0
    def test_hashed_partitioner(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
        resp1 = producer.send(self.topic, self.key("1"), self.msg("one"))
        resp2 = producer.send(self.topic, self.key("2"), self.msg("two"))
        resp3 = producer.send(self.topic, self.key("3"), self.msg("three"))
        resp4 = producer.send(self.topic, self.key("3"), self.msg("four"))
        resp5 = producer.send(self.topic, self.key("4"), self.msg("five"))

        offsets = {
            partitions[0]: start_offsets[0],
            partitions[1]: start_offsets[1]
        }
        messages = {partitions[0]: [], partitions[1]: []}

        keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]]
        resps = [resp1, resp2, resp3, resp4, resp5]
        msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]]

        for key, resp, msg in zip(keys, resps, msgs):
            k = hash(key) % 2
            partition = partitions[k]
            offset = offsets[partition]
            self.assert_produce_response(resp, offset)
            offsets[partition] += 1
            messages[partition].append(msg)

        self.assert_fetch_offset(partitions[0], start_offsets[0],
                                 messages[partitions[0]])
        self.assert_fetch_offset(partitions[1], start_offsets[1],
                                 messages[partitions[1]])

        producer.stop()
Esempio n. 19
0
    def test_round_robin_partitioner(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)

        producer = KeyedProducer(self.client,
                                 partitioner=RoundRobinPartitioner)
        resp1 = producer.send(self.topic, self.key("key1"), self.msg("one"))
        resp2 = producer.send(self.topic, self.key("key2"), self.msg("two"))
        resp3 = producer.send(self.topic, self.key("key3"), self.msg("three"))
        resp4 = producer.send(self.topic, self.key("key4"), self.msg("four"))

        self.assert_produce_response(resp1, start_offset0 + 0)
        self.assert_produce_response(resp2, start_offset1 + 0)
        self.assert_produce_response(resp3, start_offset0 + 1)
        self.assert_produce_response(resp4, start_offset1 + 1)

        self.assert_fetch_offset(
            0, start_offset0,
            [self.msg("one"), self.msg("three")])
        self.assert_fetch_offset(
            1, start_offset1,
            [self.msg("two"), self.msg("four")])

        producer.stop()
    def test_async_keyed_producer(self):
        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
        start_offset = self.current_offset(self.topic, partition)

        producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True)

        resp = producer.send(self.topic, self.key("key1"), self.msg("one"))
        self.assertEqual(len(resp), 0)

        # wait for the server to report a new highwatermark
        while self.current_offset(self.topic, partition) == start_offset:
          time.sleep(0.1)

        self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])

        producer.stop()
Esempio n. 21
0
    def test_async_keyed_producer(self):
        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
        start_offset = self.current_offset(self.topic, partition)

        producer = KeyedProducer(self.client,
                                 partitioner=RoundRobinPartitioner,
                                 async=True)

        resp = producer.send(self.topic, self.key("key1"), self.msg("one"))
        self.assertEqual(len(resp), 0)

        # wait for the server to report a new highwatermark
        while self.current_offset(self.topic, partition) == start_offset:
            time.sleep(0.1)

        self.assert_fetch_offset(partition, start_offset, [self.msg("one")])

        producer.stop()
Esempio n. 22
0
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
'''

import time
import datetime
from kafka import KafkaClient, KeyedProducer,
from kafka import HashedPartitioner, RoundRobinPartitioner

kafka = KafkaClient("localhost:6667")

#Default partitioner is HashedPartitioner
producer = KeyedProducer(kafka)
producer.send("test", "key1", "Test message with key1")
producer.send("test", "key2", "Test message with key2")

#Using RoundRobinPartitioner
producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner)
producer.send("test", "key3", "Test message with key3")
producer.send("test", "key4", "Test message with key4")