Example #1
0
class DockerExecutor(object):
    def __init__(self, warehouse, warehouse_result):
        self.warehouse = warehouse
        self.warehouse_result = warehouse_result

        self.kafka = KafkaClient(Conf.getWareHouseAddr())
        self.producer = KeyedProducer(self.kafka)
        self.consumer = KafkaConsumer(
            self.warehouse,
            bootstrap_servers=[Conf.getWareHouseAddr()],
            group_id="cnlab",
            auto_commit_enable=True,
            auto_commit_interval_ms=30 * 1000,
            auto_offset_reset='smallest')

    def run(self):
        i = 1
        for message in self.consumer.fetch_messages():
            logger.debug("%d,%s:%s:%s: key=%s " %
                         (i, message.topic, message.partition, message.offset,
                          message.key))
            task = cPickle.loads(message.value)
            i = i + 1
            result = task.run(0)
            self.producer.send_messages(self.warehouse_result, task.id,
                                        cPickle.dumps(result))
 def __init__(self, config):
     self.brokers = config['brokers']
     self.topic = config['topic']
     self.kafka = KafkaClient(self.brokers)
     if config['partitioner'] is None:
         self.producer = KeyedProducer(self.kafka, partitioner=RoundRobinPartitioner)
     else:
         self.producer = KeyedProducer(self.kafka, partitioner=config['partitioner'])
Example #3
0
 def __init__(self, host, port, topic, key=None):
     logging.Handler.__init__(self)
     self.kafka_client = KafkaClient(host, port)
     self.key = key
     if key is None:
         self.producer = SimpleProducer(self.kafka_client, topic)
     else:
         self.producer = KeyedProducer(self.kafka_client, topic)
Example #4
0
def message_sender(m):
    """Send (key, value) to a Kafka producer"""
    client = SimpleClient('localhost:9092')
    producer = KeyedProducer(client)
    rdds = m.collect()
    for d in rdds:
        producer.send_messages('flask', bytes.encode(str(d[0])), d[1])
    return
Example #5
0
class KafkaLoggingHandler(logging.Handler):
    """
    形成 kafka 日志handle
    """
    def __init__(self, host, topic, **kwargs):
        logging.Handler.__init__(self)
        self.key = kwargs.get("key", None)
        self.kafka_topic_name = topic

        if not self.key:
            self.producer = KafkaProducer(bootstrap_servers=host,
                                          api_version=(0, 10, 1),
                                          **kwargs)
        else:
            self.producer = KeyedProducer(bootstrap_servers=host,
                                          api_version=(0, 10, 1),
                                          **kwargs)

    def emit(self, record):
        # 忽略kafka的日志,以免导致无限递归。
        if 'kafka' in record.name:
            return
        try:
            # 格式化日志并指定编码为utf-8
            print(f'record : {record}')
            message = {
                'eventId':
                str(event_id),
                "eventChannel":
                record.name,
                'hostName':
                hostName,
                'address':
                host_ip,
                'eventTime':
                time.strftime("%Y-%m-%d %H:%M:%S",
                              time.localtime(record.created)),
                'level':
                record.levelname,
                'message':
                record.msg,
                'throwableInfo':
                record.exc_text
            }
            mess = json.dumps(message)
            mess = bytes(mess, encoding='utf8')
            # msg = self.format(record)
            # if isinstance(msg, unicode):
            #     msg = msg.encode("utf-8")
            # # kafka生产者,发送消息到broker。
            if not self.key:
                self.producer.send(self.kafka_topic_name, None, mess)
            else:
                self.producer.send(self.kafka_topic_name, self.key, mess)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception:
            self.handleError(record)
Example #6
0
def sendMsg(topic, lines):
    if lines.__len__() > 0:
        brokers = '10.117.181.44:9092,10.117.108.143:9092,10.117.21.79:9092'
        kafka = KafkaClient(brokers)
        producer = KeyedProducer(kafka)
        for line in lines:
            ran = "_" + str(random.randint(0, 10))
            producer.send_messages(topic, topic + ran, line)
        producer.stop()
Example #7
0
def sendMsg(topic, lines):
    if lines.__len__() > 0:
        brokers = 'cdh-slave0:9092,cdh-slave1:9092,cdh-slave2:9092'
        kafka = KafkaClient(brokers)
        producer = KeyedProducer(kafka)
        for line in lines:
            ran = "_" + str(random.randint(0, 10))
            producer.send_messages(topic, topic + ran, line)
        producer.stop()
Example #8
0
def genData(topic):
    producer = KeyedProducer(kafka)
    while True:
        with open(source_file) as f:
            for line in f:
                key = line.split(" ")[0]
                producer.send(topic, key, line.rstrip()) 
	        time.sleep(0.1)  # Creating some delay to allow proper rendering of the cab locations on the map
        
        source_file.close()
    def test_async_keyed_producer(self):
        start_offset0 = self.current_offset(self.topic, 0)

        producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True)

        resp = producer.send(self.topic, self.key("key1"), self.msg("one"))
        self.assertEqual(len(resp), 0)

        self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])

        producer.stop()
Example #10
0
def genData(topic):
    producer = KeyedProducer(kafka)
    while True:

        for line in fileinput.input(source_file):
            key = line.split("\t")[0]
            print key
            print line.rstrip()
            producer.send(topic, key, line.rstrip())
            time.sleep(0.1)  # Creating some delay to allow
        fileinput.close()
Example #11
0
def genData(topic):
    producer = KeyedProducer(kafka)
    while True:
        with open(source_file) as f:
            for line in f:
                key = line.split(" ")[0]
                producer.send(topic, key, line.rstrip())
                time.sleep(
                    0.1
                )  # Creating some delay to allow proper rendering of the cab locations on the map

        source_file.close()
class NautilusDive(object):
    def __init__(self, config):
        self.brokers = config['brokers']
        self.topic = config['topic']
        self.kafka = KafkaClient(self.brokers)
        if config['partitioner'] is None:
            self.producer = KeyedProducer(self.kafka, partitioner=RoundRobinPartitioner)
        else:
            self.producer = KeyedProducer(self.kafka, partitioner=config['partitioner'])

    def send(self, key, message):
        self.producer.send(self.topic, key, message)
 def keyedProduce(self,topic, key, value):
     kafka=KafkaClient(self.configs["broker_list"].split(","))
     keyedProducer=KeyedProducer(kafka,async=True)
     undone=True
     while(undone):
         try:
             keyedProducer.send_messages(topic, key, value)
             undone=False
         except LeaderNotAvailableError:
             sleep(10)
             print("LeaderNotAvailableError")
             pass
Example #14
0
    def __init__(self, warehouse, warehouse_result):
        self.warehouse = warehouse
        self.warehouse_result = warehouse_result

        self.kafka = KafkaClient(Conf.getWareHouseAddr())
        self.producer = KeyedProducer(self.kafka)
        self.consumer = KafkaConsumer(
            self.warehouse,
            bootstrap_servers=[Conf.getWareHouseAddr()],
            group_id="cnlab",
            auto_commit_enable=True,
            auto_commit_interval_ms=30 * 1000,
            auto_offset_reset='smallest')
Example #15
0
    def __init__(self, host, topic, **kwargs):
        logging.Handler.__init__(self)
        self.key = kwargs.get("key", None)
        self.kafka_topic_name = topic

        if not self.key:
            self.producer = KafkaProducer(bootstrap_servers=host,
                                          api_version=(0, 10, 1),
                                          **kwargs)
        else:
            self.producer = KeyedProducer(bootstrap_servers=host,
                                          api_version=(0, 10, 1),
                                          **kwargs)
Example #16
0
    def test_async_keyed_producer(self):
        start_offset0 = self.current_offset(self.topic, 0)

        producer = KeyedProducer(self.client,
                                 partitioner=RoundRobinPartitioner,
                                 async=True)

        resp = producer.send(self.topic, self.key("key1"), self.msg("one"))
        self.assertEqual(len(resp), 0)

        self.assert_fetch_offset(0, start_offset0, [self.msg("one")])

        producer.stop()
    def test_round_robin_partitioner(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        producer = KeyedProducer(self.client,
                                 partitioner=RoundRobinPartitioner)
        resp1 = producer.send_messages(self.topic, self.key("key1"),
                                       self.msg("one"))
        resp2 = producer.send_messages(self.topic, self.key("key2"),
                                       self.msg("two"))
        resp3 = producer.send_messages(self.topic, self.key("key3"),
                                       self.msg("three"))
        resp4 = producer.send_messages(self.topic, self.key("key4"),
                                       self.msg("four"))

        self.assert_produce_response(resp1, start_offsets[0] + 0)
        self.assert_produce_response(resp2, start_offsets[1] + 0)
        self.assert_produce_response(resp3, start_offsets[0] + 1)
        self.assert_produce_response(resp4, start_offsets[1] + 1)

        self.assert_fetch_offset(
            partitions[0], start_offsets[0],
            [self.msg("one"), self.msg("three")])
        self.assert_fetch_offset(
            partitions[1], start_offsets[1],
            [self.msg("two"), self.msg("four")])

        producer.stop()
    def test_keyedproducer_null_payload(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        producer = KeyedProducer(self.client,
                                 partitioner=RoundRobinPartitioner)
        key = "test"

        resp = producer.send_messages(self.topic, self.key("key1"),
                                      self.msg("one"))
        self.assert_produce_response(resp, start_offsets[0])
        resp = producer.send_messages(self.topic, self.key("key2"), None)
        self.assert_produce_response(resp, start_offsets[1])
        resp = producer.send_messages(self.topic, self.key("key3"), None)
        self.assert_produce_response(resp, start_offsets[0] + 1)
        resp = producer.send_messages(self.topic, self.key("key4"),
                                      self.msg("four"))
        self.assert_produce_response(resp, start_offsets[1] + 1)

        self.assert_fetch_offset(partitions[0], start_offsets[0],
                                 [self.msg("one"), None])
        self.assert_fetch_offset(partitions[1], start_offsets[1],
                                 [None, self.msg("four")])

        producer.stop()
Example #19
0
class KeyedProducer(BaseStreamProducer):
    def __init__(self, connection, topic_done, partitioner_cls, codec):
        self._prod = None
        self._conn = connection
        self._topic_done = topic_done
        self._partitioner_cls = partitioner_cls
        self._codec = codec

    def _connect_producer(self):
        if self._prod is None:
            try:
                self._prod = KafkaKeyedProducer(
                    self._conn,
                    partitioner=self._partitioner_cls,
                    codec=self._codec)
            except BrokerResponseError:
                self._prod = None
                logger.warning("Could not connect producer to Kafka server")
                return False
        return True

    def send(self, key, *messages):
        success = False
        max_tries = 5
        if self._connect_producer():
            n_tries = 0
            while not success and n_tries < max_tries:
                try:
                    self._prod.send_messages(self._topic_done, key, *messages)
                    success = True
                except MessageSizeTooLargeError as e:
                    logger.error(str(e))
                    break
                except BrokerResponseError:
                    n_tries += 1
                    logger.warning(
                        "Could not send message. Try {0}/{1}".format(
                            n_tries, max_tries))
                    sleep(1.0)
        return success

    def flush(self):
        if self._prod is not None:
            self._prod.stop()

    def get_offset(self, partition_id):
        # Kafka has it's own offset management
        raise KeyError
Example #20
0
def main():

  parser = make_parser()
  args = parser.parse_args()
  
  if not valid_args(args):
    parser.print_help()
    return

  if args.local:
    sniff(iface=args.interface, store=0, prn=local_out)
    return

  zk = KazooClient(args.zookeeper)
  zk.start()
  
  kafka = KafkaClient(zk_broker_list(zk))

  # the sniff callback only takes one parameter which is the packet
  # so everything else must be global
  global producer
  producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner)

  global packet_count
  packet_count = 0

  global topic
  topic = args.topic

  global debug
  debug = args.debug

  sniff(iface=args.interface, store=0, prn=produce_callback)
Example #21
0
def main():
    ta3Host = '127.0.0.1'
    toP = b'prioritizer'
    toDX = b'dx'
    toUI = b'ui'

    kafkaServer = ta3Host + ':9092'
    kafka = KafkaClient(kafkaServer)
    producer = SimpleProducer(kafka)
    uiProducer = KeyedProducer(kafka)
    consumer = KafkaConsumer(toP,
                             bootstrap_servers=[kafkaServer],
                             consumer_timeout_ms=20)

    def sendMsg(m):
        i = random.randint(1, 100)
        if i > 70:
            print("Segment #" + m +
                  " is highly suspect. Immediately notify user.")
            uiProducer.send_messages(toUI, b'fromPR', m)
        print("Notify diagnostic engine of segment #" + m + "...")
        producer.send_messages(toDX, m)

    def recvMsg():
        try:
            x = consumer.next()
            return x
        except ConsumerTimeout:
            return None

    oper(sendMsg, recvMsg)
    def test_async_keyed_producer(self):
        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
        start_offset = self.current_offset(self.topic, partition)

        producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True)

        resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one"))
        self.assertEqual(len(resp), 0)

        # wait for the server to report a new highwatermark
        while self.current_offset(self.topic, partition) == start_offset:
          time.sleep(0.1)

        self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])

        producer.stop()
    def _connect_producer(self):
        """If producer is not connected try to connect it now.

        :returns: bool -- True if producer is connected
        """        
        if self._prod is None:
            try:
                self._prod = KeyedProducer(self._conn, partitioner=FingerprintPartitioner, codec=CODEC_SNAPPY)
            except BrokerResponseError:
                self._prod = None        
                if self._manager is not None:
                    self._manager.logger.backend.warning(
                        "Could not connect producer to Kafka server")
                return False

        return True
    def test_async_keyed_producer(self):
        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
        start_offset = self.current_offset(self.topic, partition)

        producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True)

        resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one"))
        self.assertEqual(len(resp), 0)

        # wait for the server to report a new highwatermark
        while self.current_offset(self.topic, partition) == start_offset:
          time.sleep(0.1)

        self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])

        producer.stop()
Example #25
0
def keyedProducerTest2():
    '''test KeyedProducer
    @topic:单replica情况(JOB_TEST_1)
    @function:测试KeyedProducer,向指定的broker发布消息,
        并验证develops-dev1:9193关闭之后的异常报错
    '''
    import pdb
    pdb.set_trace()
    kafkaClient = KafkaClient('devops-dev1:9193')
    producer = KeyedProducer(kafkaClient)
    message = "This is a test-"
    index = 0
    while True:
        tmpmsg = message + str(index)
        producer.send_messages(b'JOB_TEST_1', 'keys', tmpmsg)
        index += 1
        time.sleep(1)
Example #26
0
 def producer(self):
     if self._producer is None and not self._has_error:
         if self.kafka is not None:
             self._producer = KeyedProducer(self._kafka)
         else:
             # if self.kafka is None then we should be in an error state
             assert self._has_error
     return self._producer
Example #27
0
    def test_keyedproducer_message_types(self):
        client = MagicMock()
        client.get_partition_ids_for_topic.return_value = [0, 1]
        producer = KeyedProducer(client)
        topic = b"test-topic"
        key = b"testkey"

        bad_data_types = (u"你怎么样?", 12, ["a", "list"], ("a", "tuple"), {"a": "dict"})
        for m in bad_data_types:
            with self.assertRaises(TypeError):
                logging.debug("attempting to send message of type %s", type(m))
                producer.send_messages(topic, key, m)

        good_data_types = (b"a string!", None)
        for m in good_data_types:
            # This should not raise an exception
            producer.send_messages(topic, key, m)
Example #28
0
def keyed_messages():
    '''Keyed messages'''
    from kafka import (KafkaClient, KeyedProducer,
    Murmur2Partitioner, RoundRobinPartitioner)

    kafka = KafkaClient(KAFKA_SERVER)

    # HashedPartitioner is default (currently uses python hash())
    producer = KeyedProducer(kafka)
    producer.send_messages(b'topic1', b'key1', b'some message')
    producer.send_messages(b'topic1', b'key2', b'this methode')

    # Murmur2Partitioner attempts to mirror the java client hashing
    producer = KeyedProducer(kafka, partitioner=Murmur2Partitioner)

    # Or just produce round-robin (or just use SimpleProducer)
    producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner)
def keyedProducerTest2():
    '''test KeyedProducer
    @topic:单replica情况(JOB_TEST_1)
    @function:测试KeyedProducer,向指定的broker发布消息,
        并验证develops-dev1:9193关闭之后的异常报错
    '''
    import pdb
    pdb.set_trace()
    kafkaClient = KafkaClient('devops-dev1:9193')
    producer = KeyedProducer(kafkaClient)
    message = "This is a test-" 
    index = 0
    while True:
        tmpmsg = message + str(index)
        producer.send_messages(b'JOB_TEST_1', 'keys', tmpmsg) 
        index += 1
        time.sleep(1)
Example #30
0
class KeyedProducer(BaseStreamProducer):
    def __init__(self, connection, topic_done, partitioner_cls, codec):
        self._prod = None
        self._conn = connection
        self._topic_done = topic_done
        self._partitioner_cls = partitioner_cls
        self._codec = codec

    def _connect_producer(self):
        if self._prod is None:
            try:
                self._prod = KafkaKeyedProducer(self._conn, partitioner=self._partitioner_cls, codec=self._codec)
            except BrokerResponseError:
                self._prod = None
                logger.warning("Could not connect producer to Kafka server")
                return False
        return True

    def send(self, key, *messages):
        success = False
        max_tries = 5
        if self._connect_producer():
            n_tries = 0
            while not success and n_tries < max_tries:
                try:
                    self._prod.send_messages(self._topic_done, key, *messages)
                    success = True
                except MessageSizeTooLargeError as e:
                    logger.error(str(e))
                    break
                except BrokerResponseError:
                    n_tries += 1
                    logger.warning(
                        "Could not send message. Try {0}/{1}".format(
                            n_tries, max_tries)
                    )
                    sleep(1.0)
        return success

    def flush(self):
        if self._prod is not None:
            self._prod.stop()

    def get_offset(self, partition_id):
        # Kafka has it's own offset management
        raise KeyError
Example #31
0
def waitBrokerRecover(kafkaClient):
    '''wait broker recover'''
    try:
        producer = KeyedProducer(kafkaClient)
    except KafkaUnavailableError:
        time.sleep(10)
    else:
        return producer
Example #32
0
    def kafkaTasks(self, addr, topic,tasks):
        try :
            from kafka import SimpleProducer, KafkaClient, KeyedProducer
        except:
            logger.error("kafka-python is not installed")
            raise Exception("kafka-python is not installed")
        kafka_client = None
        try :
            kafka_client = KafkaClient(addr)
            producer = KeyedProducer(kafka_client)

            for task in tasks:
                #self.producer.send_messages(self.warehouse,task.id, json.dumps(task,default=object2dict))
                producer.send_messages(topic, self.manager.name, cPickle.dumps(task))
        finally:
            if kafka_client:
                kafka_client.close()
Example #33
0
    def test_hashed_partitioner(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)

        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
        resp1 = producer.send(self.topic, 1, self.msg("one"))
        resp2 = producer.send(self.topic, 2, self.msg("two"))
        resp3 = producer.send(self.topic, 3, self.msg("three"))
        resp4 = producer.send(self.topic, 3, self.msg("four"))
        resp5 = producer.send(self.topic, 4, self.msg("five"))

        self.assert_produce_response(resp1, start_offset1 + 0)
        self.assert_produce_response(resp2, start_offset0 + 0)
        self.assert_produce_response(resp3, start_offset1 + 1)
        self.assert_produce_response(resp4, start_offset1 + 2)
        self.assert_produce_response(resp5, start_offset0 + 1)

        self.assert_fetch_offset(
            0, start_offset0,
            [self.msg("two"), self.msg("five")])
        self.assert_fetch_offset(
            1, start_offset1,
            [self.msg("one"),
             self.msg("three"),
             self.msg("four")])

        producer.stop()
Example #34
0
    def test_hashed_partitioner(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
        resp1 = producer.send_messages(self.topic, self.key("1"), self.msg("one"))
        resp2 = producer.send_messages(self.topic, self.key("2"), self.msg("two"))
        resp3 = producer.send_messages(self.topic, self.key("3"), self.msg("three"))
        resp4 = producer.send_messages(self.topic, self.key("3"), self.msg("four"))
        resp5 = producer.send_messages(self.topic, self.key("4"), self.msg("five"))

        offsets = {partitions[0]: start_offsets[0], partitions[1]: start_offsets[1]}
        messages = {partitions[0]: [], partitions[1]: []}

        keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]]
        resps = [resp1, resp2, resp3, resp4, resp5]
        msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]]

        for key, resp, msg in zip(keys, resps, msgs):
            k = hash(key) % 2
            partition = partitions[k]
            offset = offsets[partition]
            self.assert_produce_response(resp, offset)
            offsets[partition] += 1
            messages[partition].append(msg)

        self.assert_fetch_offset(partitions[0], start_offsets[0], messages[partitions[0]])
        self.assert_fetch_offset(partitions[1], start_offsets[1], messages[partitions[1]])

        producer.stop()
Example #35
0
    def test_hashed_partitioner(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)

        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
        resp1 = producer.send(self.topic, self.key("1"), self.msg("one"))
        resp2 = producer.send(self.topic, self.key("2"), self.msg("two"))
        resp3 = producer.send(self.topic, self.key("3"), self.msg("three"))
        resp4 = producer.send(self.topic, self.key("3"), self.msg("four"))
        resp5 = producer.send(self.topic, self.key("4"), self.msg("five"))

        offsets = {0: start_offset0, 1: start_offset1}
        messages = {0: [], 1: []}

        keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]]
        resps = [resp1, resp2, resp3, resp4, resp5]
        msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]]

        for key, resp, msg in zip(keys, resps, msgs):
            k = hash(key) % 2
            offset = offsets[k]
            self.assert_produce_response(resp, offset)
            offsets[k] += 1
            messages[k].append(msg)

        self.assert_fetch_offset(0, start_offset0, messages[0])
        self.assert_fetch_offset(1, start_offset1, messages[1])

        producer.stop()
Example #36
0
 def _connect_producer(self):
     if self._prod is None:
         try:
             self._prod = KafkaKeyedProducer(self._conn, partitioner=self._partitioner_cls, codec=CODEC_SNAPPY)
         except BrokerResponseError:
             self._prod = None
             logger.warning("Could not connect producer to Kafka server")
             return False
     return True
Example #37
0
    def test_keyedproducer_message_types(self):
        client = MagicMock()
        client.get_partition_ids_for_topic.return_value = [0, 1]
        producer = KeyedProducer(client)
        topic = b"test-topic"
        key = b"testkey"

        bad_data_types = (u'你怎么样?', 12, ['a', 'list'],
                          ('a', 'tuple'), {'a': 'dict'},)
        for m in bad_data_types:
            with self.assertRaises(TypeError):
                logging.debug("attempting to send message of type %s", type(m))
                producer.send_messages(topic, key, m)

        good_data_types = (b'a string!', None,)
        for m in good_data_types:
            # This should not raise an exception
            producer.send_messages(topic, key, m)
Example #38
0
    def kafkaTasks(self, addr, topic, tasks):
        try:
            from kafka import SimpleProducer, KafkaClient, KeyedProducer
        except:
            logger.error("kafka-python is not installed")
            raise Exception("kafka-python is not installed")
        kafka_client = None
        try:
            kafka_client = KafkaClient(addr)
            producer = KeyedProducer(kafka_client)

            for task in tasks:
                #self.producer.send_messages(self.warehouse,task.id, json.dumps(task,default=object2dict))
                producer.send_messages(topic, self.manager.name,
                                       cPickle.dumps(task))
        finally:
            if kafka_client:
                kafka_client.close()
Example #39
0
 def _connect_producer(self):
     if self._prod is None:
         try:
             self._prod = KafkaKeyedProducer(self._conn, partitioner=self._partitioner_cls, codec=CODEC_SNAPPY)
         except BrokerResponseError:
             self._prod = None
             logger.warning("Could not connect producer to Kafka server")
             return False
     return True
Example #40
0
class KafkaBolt(Bolt):

    def initialize(self, stormconf, ctx):
        self.kafka_client = KafkaClient(config['kafka']['hosts'])
        self.keyed_producer = KeyedProducer(self.kafka_client)
        self.simple_producer = SimpleProducer(self.kafka_client)

    def process(self, tup):
        report_id, record_type, report_data = tup.values
        self.log('Processing: %s' % report_id)
        json_data = str(report_data)
        report_id = str(report_id)
        topic = str("sanitised")
        if record_type == "entry":
            payload = str("e" + json_data)
        elif record_type == "header":
            payload = str("h" + json_data)
        elif record_type == "footer":
            payload = str("f" + json_data)
        self.keyed_producer.send(topic, report_id, payload)
Example #41
0
    def __init__(self,warehouse,warehouse_result):
        self.warehouse = warehouse
        self.warehouse_result = warehouse_result

        self.kafka = KafkaClient(Conf.getWareHouseAddr())
        self.producer = KeyedProducer(self.kafka)
        self.consumer = KafkaConsumer(self.warehouse,
                               bootstrap_servers=[Conf.getWareHouseAddr()],
                               group_id="cnlab",
                               auto_commit_enable=True,
                               auto_commit_interval_ms=30 * 1000,
                               auto_offset_reset='smallest')
    def test_hashed_partitioner(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
        resp1 = producer.send_messages(self.topic, self.key("1"), self.msg("one"))
        resp2 = producer.send_messages(self.topic, self.key("2"), self.msg("two"))
        resp3 = producer.send_messages(self.topic, self.key("3"), self.msg("three"))
        resp4 = producer.send_messages(self.topic, self.key("3"), self.msg("four"))
        resp5 = producer.send_messages(self.topic, self.key("4"), self.msg("five"))

        offsets = {partitions[0]: start_offsets[0], partitions[1]: start_offsets[1]}
        messages = {partitions[0]: [], partitions[1]: []}

        keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]]
        resps = [resp1, resp2, resp3, resp4, resp5]
        msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]]

        for key, resp, msg in zip(keys, resps, msgs):
            k = hash(key) % 2
            partition = partitions[k]
            offset = offsets[partition]
            self.assert_produce_response(resp, offset)
            offsets[partition] += 1
            messages[partition].append(msg)

        self.assert_fetch_offset(partitions[0], start_offsets[0], messages[partitions[0]])
        self.assert_fetch_offset(partitions[1], start_offsets[1], messages[partitions[1]])

        producer.stop()
Example #43
0
def init():
    oauth_client = Oauth(config.get('oauth', 'consumer_key'),
                         config.get('oauth', 'consumer_secret'),
                         config.get('oauth', 'request_token_url'),
                         config.get('oauth', 'access_token_url'),
                         config.get('oauth', 'authorize_url'),
                         version=config.get('oauth', 'version'))

    request = Request(url=config.get('twitter', 'streaming_filter_url'),
                      method="POST",
                      is_streaming=True,
                      headers={'Accept-Encoding': 'deflate, gzip '},
                      payload={'locations': '-118.39,30.41,-59.61,49.46'},
                      token=token)

    max_stream = int(config.get('twitter', 'max_stream_responses'))
    topic = config.get('kafka', 'topic')
    max_skip_invalid_responses = config.getint('twitter', 'max_skip_invalid_response')
    skip_invalid_responses = config.getboolean('twitter', 'skip_invalid')
    producer = KeyedProducer(kafka_client, async=True)

    twitter = TwitterStream(oauth_client, json)
    tweets = twitter.get_tweets(request)

    # Starts here.
    try:
        if max_stream < 0:
            send_unlimited_messages(tweets, producer, topic)
        else:
            send_limited_messages(max_stream,
                                  tweets,
                                  producer,
                                  topic,
                                  skip_invalid_responses,
                                  max_skip_invalid_responses)
    except Exception as e:
        print e
    finally:
        producer.stop()
        kafka_client.close()
    def test_hashed_partitioner(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)

        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
        resp1 = producer.send(self.topic, self.key("1"), self.msg("one"))
        resp2 = producer.send(self.topic, self.key("2"), self.msg("two"))
        resp3 = producer.send(self.topic, self.key("3"), self.msg("three"))
        resp4 = producer.send(self.topic, self.key("3"), self.msg("four"))
        resp5 = producer.send(self.topic, self.key("4"), self.msg("five"))

        offsets = {0: start_offset0, 1: start_offset1}
        messages = {0: [], 1: []}

        keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]]
        resps = [resp1, resp2, resp3, resp4, resp5]
        msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]]

        for key, resp, msg in zip(keys, resps, msgs):
            k = hash(key) % 2
            offset = offsets[k]
            self.assert_produce_response(resp, offset)
            offsets[k] += 1
            messages[k].append(msg)

        self.assert_fetch_offset(0, start_offset0, messages[0])
        self.assert_fetch_offset(1, start_offset1, messages[1])

        producer.stop()
Example #45
0
class KafkaLoggingHandler(logging.Handler):

    def __init__(self, host, port, topic, key=None):
        logging.Handler.__init__(self)
        self.kafka_client = KafkaClient(host, port)
        self.key = key
        if key is None:
            self.producer = SimpleProducer(self.kafka_client, topic)
        else:
            self.producer = KeyedProducer(self.kafka_client, topic)

    def emit(self, record):
        #drop kafka logging to avoid infinite recursion
        if record.name == 'kafka':
            return
        try:
            #use default formatting
            msg = self.format(record)
            #produce message
            if self.key is None:
                self.producer.send_messages(msg)
            else:
                self.producer.send(self.key, msg)
        except:
            import traceback
            ei = sys.exc_info()
            traceback.print_exception(ei[0], ei[1], ei[2], None, sys.stderr)
            del ei

    def close(self):
        self.producer.stop()
        logging.Handler.close(self)
Example #46
0
    def test_keyedproducer_message_types(self):
        client = MagicMock()
        client.get_partition_ids_for_topic.return_value = [0, 1]
        producer = KeyedProducer(client)
        topic = b"test-topic"
        key = b"testkey"

        bad_data_types = (
            u'你怎么样?',
            12,
            ['a', 'list'],
            ('a', 'tuple'),
            {
                'a': 'dict'
            },
        )
        for m in bad_data_types:
            with self.assertRaises(TypeError):
                logging.debug("attempting to send message of type %s", type(m))
                producer.send_messages(topic, key, m)

        good_data_types = (
            b'a string!',
            None,
        )
        for m in good_data_types:
            # This should not raise an exception
            producer.send_messages(topic, key, m)
def keyedProducerTest3():
    '''test KeyedProducer
    @topic:单replica情况(JOB_TEST_1)
    @function:测试KeyedProducer,向指定的broker发布消息,
        并验证develops-dev1:9193关闭之后的异常的恢复情况
        (等待10秒,不用重新拉起,自动关联)
    '''
    import pdb
    pdb.set_trace()
    kafkaClient = KafkaClient('devops-dev1:9193')
    producer = KeyedProducer(kafkaClient)
    message = "This is a test-" 
    index = 0
    while True:
        try:
            tmpmsg = message + str(index)
            producer.send_messages(b'JOB_TEST_1', 'keys', tmpmsg) 
            index += 1
            time.sleep(1)
        except (FailedPayloadsError,KafkaUnavailableError), msg:
            print 'Occur FailedPayloadsError error, msg:', msg
            time.sleep(10)
Example #48
0
class DockerExecutor(object):
    def __init__(self,warehouse,warehouse_result):
        self.warehouse = warehouse
        self.warehouse_result = warehouse_result

        self.kafka = KafkaClient(Conf.getWareHouseAddr())
        self.producer = KeyedProducer(self.kafka)
        self.consumer = KafkaConsumer(self.warehouse,
                               bootstrap_servers=[Conf.getWareHouseAddr()],
                               group_id="cnlab",
                               auto_commit_enable=True,
                               auto_commit_interval_ms=30 * 1000,
                               auto_offset_reset='smallest')

    def run(self):
        i=1
        for message in self.consumer.fetch_messages():
            logger.debug("%d,%s:%s:%s: key=%s " % (i,message.topic, message.partition, message.offset, message.key))
            task = cPickle.loads(message.value)
            i = i + 1
            result = task.run(0)
            self.producer.send_messages(self.warehouse_result, task.id, cPickle.dumps(result))
def main():
    # To send messages synchronously
    kafka = KafkaClient('localhost:9092')
    producer = KeyedProducer(kafka)

    # Insure that topic exists
    kafka.ensure_topic_exists('test')

    while True:
        input_str = raw_input("Press enter to send another message, otherwise press 'q' to quit: ")

        if input_str and input_str in "qQ":
            sys.exit(0)

        if not input_str:
            print "No input was provided"
        else:
            producer.send_messages(
                'test',  # topic
                'topic-key',  # key
                "(time: {}, message: {})".format(get_time(), input_str),  # message
            )
Example #50
0
def keyedProducerTest3():
    '''test KeyedProducer
    @topic:单replica情况(JOB_TEST_1)
    @function:测试KeyedProducer,向指定的broker发布消息,
        并验证develops-dev1:9193关闭之后的异常的恢复情况
        (等待10秒,不用重新拉起,自动关联)
    '''
    import pdb
    pdb.set_trace()
    kafkaClient = KafkaClient('devops-dev1:9193')
    producer = KeyedProducer(kafkaClient)
    message = "This is a test-"
    index = 0
    while True:
        try:
            tmpmsg = message + str(index)
            producer.send_messages(b'JOB_TEST_1', 'keys', tmpmsg)
            index += 1
            time.sleep(1)
        except (FailedPayloadsError, KafkaUnavailableError) as msg:
            print('Occur FailedPayloadsError error, msg:', msg)
            time.sleep(10)
Example #51
0
class KeyedProducer(BaseStreamProducer):
    def __init__(self, connection, topic_done, partitioner_cls):
        self._prod = None
        self._conn = connection
        self._topic_done = topic_done
        self._partitioner_cls = partitioner_cls

    def _connect_producer(self):
        if self._prod is None:
            try:
                self._prod = KafkaKeyedProducer(
                    self._conn,
                    partitioner=self._partitioner_cls,
                    codec=CODEC_SNAPPY)
            except BrokerResponseError:
                self._prod = None
                logger.warning("Could not connect producer to Kafka server")
                return False
        return True

    def send(self, key, *messages):
        success = False
        max_tries = 5
        if self._connect_producer():
            n_tries = 0
            while not success and n_tries < max_tries:
                try:
                    self._prod.send_messages(self._topic_done, key, *messages)
                    success = True
                except MessageSizeTooLargeError, e:
                    logger.error(str(e))
                    break
                except BrokerResponseError:
                    n_tries += 1
                    logger.warning(
                        "Could not send message. Try {0}/{1}".format(
                            n_tries, max_tries))
                    sleep(1.0)
Example #52
0
class KeyedProducer(BaseStreamProducer):
    def __init__(self, connection, topic_done, partitioner_cls):
        self._prod = None
        self._conn = connection
        self._topic_done = topic_done
        self._partitioner_cls = partitioner_cls

    def _connect_producer(self):
        if self._prod is None:
            try:
                self._prod = KafkaKeyedProducer(self._conn, partitioner=self._partitioner_cls, codec=CODEC_SNAPPY)
            except BrokerResponseError:
                self._prod = None
                logger.warning("Could not connect producer to Kafka server")
                return False
        return True

    def send(self, key, *messages):
        success = False
        max_tries = 5
        if self._connect_producer():
            n_tries = 0
            while not success and n_tries < max_tries:
                try:
                    self._prod.send_messages(self._topic_done, key, *messages)
                    success = True
                except MessageSizeTooLargeError, e:
                    logger.error(str(e))
                    break
                except BrokerResponseError:
                    n_tries += 1
                    logger.warning(
                        "Could not send message. Try {0}/{1}".format(
                            n_tries, max_tries)
                    )
                    sleep(1.0)
Example #53
0
    def _connect_producer(self):
        """If producer is not connected try to connect it now.

        :returns: bool -- True if producer is connected
        """
        if self._prod is None:
            try:
                self._prod = KeyedProducer(self._conn, partitioner=FingerprintPartitioner, codec=CODEC_SNAPPY)
            except BrokerResponseError:
                self._prod = None
                if self._manager is not None:
                    self._manager.logger.backend.warning(
                        "Could not connect producer to Kafka server")
                return False

        return True
    def test_keyedproducer_null_payload(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

        producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner)
        key = "test"

        resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one"))
        self.assert_produce_response(resp, start_offsets[0])
        resp = producer.send_messages(self.topic, self.key("key2"), None)
        self.assert_produce_response(resp, start_offsets[1])
        resp = producer.send_messages(self.topic, self.key("key3"), None)
        self.assert_produce_response(resp, start_offsets[0]+1)
        resp = producer.send_messages(self.topic, self.key("key4"), self.msg("four"))
        self.assert_produce_response(resp, start_offsets[1]+1)

        self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), None ])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [ None, self.msg("four") ])

        producer.stop()
    def test_round_robin_partitioner(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)

        producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner)
        resp1 = producer.send(self.topic, self.key("key1"), self.msg("one"))
        resp2 = producer.send(self.topic, self.key("key2"), self.msg("two"))
        resp3 = producer.send(self.topic, self.key("key3"), self.msg("three"))
        resp4 = producer.send(self.topic, self.key("key4"), self.msg("four"))

        self.assert_produce_response(resp1, start_offset0+0)
        self.assert_produce_response(resp2, start_offset1+0)
        self.assert_produce_response(resp3, start_offset0+1)
        self.assert_produce_response(resp4, start_offset1+1)

        self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("three") ])
        self.assert_fetch_offset(1, start_offset1, [ self.msg("two"), self.msg("four")  ])

        producer.stop()
    def test_round_robin_partitioner(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

        producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner)
        resp1 = producer.send_messages(self.topic, self.key("key1"), self.msg("one"))
        resp2 = producer.send_messages(self.topic, self.key("key2"), self.msg("two"))
        resp3 = producer.send_messages(self.topic, self.key("key3"), self.msg("three"))
        resp4 = producer.send_messages(self.topic, self.key("key4"), self.msg("four"))

        self.assert_produce_response(resp1, start_offsets[0]+0)
        self.assert_produce_response(resp2, start_offsets[1]+0)
        self.assert_produce_response(resp3, start_offsets[0]+1)
        self.assert_produce_response(resp4, start_offsets[1]+1)

        self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("three") ])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("two"), self.msg("four")  ])

        producer.stop()
    def test_switch_leader_keyed_producer(self):
        topic = self.topic

        producer = KeyedProducer(self.client, async=False)

        # Send 10 random messages
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)

        # kill leader for partition 0
        self._kill_leader(topic, 0)

        recovered = False
        started = time.time()
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
                key = random_string(3).encode('utf-8')
                msg = random_string(10).encode('utf-8')
                producer.send_messages(topic, key, msg)
                if producer.partitioners[topic].partition(key) == 0:
                    recovered = True
            except (FailedPayloadsError, ConnectionError, RequestTimedOutError,
                    NotLeaderForPartitionError):
                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
        self.assertTrue(recovered)

        # send some more messages just to make sure no more exceptions
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)
    def test_hashed_partitioner(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)

        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
        resp1 = producer.send(self.topic, 1, self.msg("one"))
        resp2 = producer.send(self.topic, 2, self.msg("two"))
        resp3 = producer.send(self.topic, 3, self.msg("three"))
        resp4 = producer.send(self.topic, 3, self.msg("four"))
        resp5 = producer.send(self.topic, 4, self.msg("five"))

        self.assert_produce_response(resp1, start_offset1+0)
        self.assert_produce_response(resp2, start_offset0+0)
        self.assert_produce_response(resp3, start_offset1+1)
        self.assert_produce_response(resp4, start_offset1+2)
        self.assert_produce_response(resp5, start_offset0+1)

        self.assert_fetch_offset(0, start_offset0, [ self.msg("two"), self.msg("five") ])
        self.assert_fetch_offset(1, start_offset1, [ self.msg("one"), self.msg("three"), self.msg("four") ])

        producer.stop()