class DockerExecutor(object): def __init__(self, warehouse, warehouse_result): self.warehouse = warehouse self.warehouse_result = warehouse_result self.kafka = KafkaClient(Conf.getWareHouseAddr()) self.producer = KeyedProducer(self.kafka) self.consumer = KafkaConsumer( self.warehouse, bootstrap_servers=[Conf.getWareHouseAddr()], group_id="cnlab", auto_commit_enable=True, auto_commit_interval_ms=30 * 1000, auto_offset_reset='smallest') def run(self): i = 1 for message in self.consumer.fetch_messages(): logger.debug("%d,%s:%s:%s: key=%s " % (i, message.topic, message.partition, message.offset, message.key)) task = cPickle.loads(message.value) i = i + 1 result = task.run(0) self.producer.send_messages(self.warehouse_result, task.id, cPickle.dumps(result))
def __init__(self, config): self.brokers = config['brokers'] self.topic = config['topic'] self.kafka = KafkaClient(self.brokers) if config['partitioner'] is None: self.producer = KeyedProducer(self.kafka, partitioner=RoundRobinPartitioner) else: self.producer = KeyedProducer(self.kafka, partitioner=config['partitioner'])
def __init__(self, host, port, topic, key=None): logging.Handler.__init__(self) self.kafka_client = KafkaClient(host, port) self.key = key if key is None: self.producer = SimpleProducer(self.kafka_client, topic) else: self.producer = KeyedProducer(self.kafka_client, topic)
def message_sender(m): """Send (key, value) to a Kafka producer""" client = SimpleClient('localhost:9092') producer = KeyedProducer(client) rdds = m.collect() for d in rdds: producer.send_messages('flask', bytes.encode(str(d[0])), d[1]) return
class KafkaLoggingHandler(logging.Handler): """ 形成 kafka 日志handle """ def __init__(self, host, topic, **kwargs): logging.Handler.__init__(self) self.key = kwargs.get("key", None) self.kafka_topic_name = topic if not self.key: self.producer = KafkaProducer(bootstrap_servers=host, api_version=(0, 10, 1), **kwargs) else: self.producer = KeyedProducer(bootstrap_servers=host, api_version=(0, 10, 1), **kwargs) def emit(self, record): # 忽略kafka的日志,以免导致无限递归。 if 'kafka' in record.name: return try: # 格式化日志并指定编码为utf-8 print(f'record : {record}') message = { 'eventId': str(event_id), "eventChannel": record.name, 'hostName': hostName, 'address': host_ip, 'eventTime': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(record.created)), 'level': record.levelname, 'message': record.msg, 'throwableInfo': record.exc_text } mess = json.dumps(message) mess = bytes(mess, encoding='utf8') # msg = self.format(record) # if isinstance(msg, unicode): # msg = msg.encode("utf-8") # # kafka生产者,发送消息到broker。 if not self.key: self.producer.send(self.kafka_topic_name, None, mess) else: self.producer.send(self.kafka_topic_name, self.key, mess) except (KeyboardInterrupt, SystemExit): raise except Exception: self.handleError(record)
def sendMsg(topic, lines): if lines.__len__() > 0: brokers = '10.117.181.44:9092,10.117.108.143:9092,10.117.21.79:9092' kafka = KafkaClient(brokers) producer = KeyedProducer(kafka) for line in lines: ran = "_" + str(random.randint(0, 10)) producer.send_messages(topic, topic + ran, line) producer.stop()
def sendMsg(topic, lines): if lines.__len__() > 0: brokers = 'cdh-slave0:9092,cdh-slave1:9092,cdh-slave2:9092' kafka = KafkaClient(brokers) producer = KeyedProducer(kafka) for line in lines: ran = "_" + str(random.randint(0, 10)) producer.send_messages(topic, topic + ran, line) producer.stop()
def genData(topic): producer = KeyedProducer(kafka) while True: with open(source_file) as f: for line in f: key = line.split(" ")[0] producer.send(topic, key, line.rstrip()) time.sleep(0.1) # Creating some delay to allow proper rendering of the cab locations on the map source_file.close()
def test_async_keyed_producer(self): start_offset0 = self.current_offset(self.topic, 0) producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True) resp = producer.send(self.topic, self.key("key1"), self.msg("one")) self.assertEqual(len(resp), 0) self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ]) producer.stop()
def genData(topic): producer = KeyedProducer(kafka) while True: for line in fileinput.input(source_file): key = line.split("\t")[0] print key print line.rstrip() producer.send(topic, key, line.rstrip()) time.sleep(0.1) # Creating some delay to allow fileinput.close()
def genData(topic): producer = KeyedProducer(kafka) while True: with open(source_file) as f: for line in f: key = line.split(" ")[0] producer.send(topic, key, line.rstrip()) time.sleep( 0.1 ) # Creating some delay to allow proper rendering of the cab locations on the map source_file.close()
class NautilusDive(object): def __init__(self, config): self.brokers = config['brokers'] self.topic = config['topic'] self.kafka = KafkaClient(self.brokers) if config['partitioner'] is None: self.producer = KeyedProducer(self.kafka, partitioner=RoundRobinPartitioner) else: self.producer = KeyedProducer(self.kafka, partitioner=config['partitioner']) def send(self, key, message): self.producer.send(self.topic, key, message)
def keyedProduce(self,topic, key, value): kafka=KafkaClient(self.configs["broker_list"].split(",")) keyedProducer=KeyedProducer(kafka,async=True) undone=True while(undone): try: keyedProducer.send_messages(topic, key, value) undone=False except LeaderNotAvailableError: sleep(10) print("LeaderNotAvailableError") pass
def __init__(self, warehouse, warehouse_result): self.warehouse = warehouse self.warehouse_result = warehouse_result self.kafka = KafkaClient(Conf.getWareHouseAddr()) self.producer = KeyedProducer(self.kafka) self.consumer = KafkaConsumer( self.warehouse, bootstrap_servers=[Conf.getWareHouseAddr()], group_id="cnlab", auto_commit_enable=True, auto_commit_interval_ms=30 * 1000, auto_offset_reset='smallest')
def __init__(self, host, topic, **kwargs): logging.Handler.__init__(self) self.key = kwargs.get("key", None) self.kafka_topic_name = topic if not self.key: self.producer = KafkaProducer(bootstrap_servers=host, api_version=(0, 10, 1), **kwargs) else: self.producer = KeyedProducer(bootstrap_servers=host, api_version=(0, 10, 1), **kwargs)
def test_async_keyed_producer(self): start_offset0 = self.current_offset(self.topic, 0) producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner, async=True) resp = producer.send(self.topic, self.key("key1"), self.msg("one")) self.assertEqual(len(resp), 0) self.assert_fetch_offset(0, start_offset0, [self.msg("one")]) producer.stop()
def test_round_robin_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) resp1 = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) resp2 = producer.send_messages(self.topic, self.key("key2"), self.msg("two")) resp3 = producer.send_messages(self.topic, self.key("key3"), self.msg("three")) resp4 = producer.send_messages(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp1, start_offsets[0] + 0) self.assert_produce_response(resp2, start_offsets[1] + 0) self.assert_produce_response(resp3, start_offsets[0] + 1) self.assert_produce_response(resp4, start_offsets[1] + 1) self.assert_fetch_offset( partitions[0], start_offsets[0], [self.msg("one"), self.msg("three")]) self.assert_fetch_offset( partitions[1], start_offsets[1], [self.msg("two"), self.msg("four")]) producer.stop()
def test_keyedproducer_null_payload(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) key = "test" resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) self.assert_produce_response(resp, start_offsets[0]) resp = producer.send_messages(self.topic, self.key("key2"), None) self.assert_produce_response(resp, start_offsets[1]) resp = producer.send_messages(self.topic, self.key("key3"), None) self.assert_produce_response(resp, start_offsets[0] + 1) resp = producer.send_messages(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp, start_offsets[1] + 1) self.assert_fetch_offset(partitions[0], start_offsets[0], [self.msg("one"), None]) self.assert_fetch_offset(partitions[1], start_offsets[1], [None, self.msg("four")]) producer.stop()
class KeyedProducer(BaseStreamProducer): def __init__(self, connection, topic_done, partitioner_cls, codec): self._prod = None self._conn = connection self._topic_done = topic_done self._partitioner_cls = partitioner_cls self._codec = codec def _connect_producer(self): if self._prod is None: try: self._prod = KafkaKeyedProducer( self._conn, partitioner=self._partitioner_cls, codec=self._codec) except BrokerResponseError: self._prod = None logger.warning("Could not connect producer to Kafka server") return False return True def send(self, key, *messages): success = False max_tries = 5 if self._connect_producer(): n_tries = 0 while not success and n_tries < max_tries: try: self._prod.send_messages(self._topic_done, key, *messages) success = True except MessageSizeTooLargeError as e: logger.error(str(e)) break except BrokerResponseError: n_tries += 1 logger.warning( "Could not send message. Try {0}/{1}".format( n_tries, max_tries)) sleep(1.0) return success def flush(self): if self._prod is not None: self._prod.stop() def get_offset(self, partition_id): # Kafka has it's own offset management raise KeyError
def main(): parser = make_parser() args = parser.parse_args() if not valid_args(args): parser.print_help() return if args.local: sniff(iface=args.interface, store=0, prn=local_out) return zk = KazooClient(args.zookeeper) zk.start() kafka = KafkaClient(zk_broker_list(zk)) # the sniff callback only takes one parameter which is the packet # so everything else must be global global producer producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner) global packet_count packet_count = 0 global topic topic = args.topic global debug debug = args.debug sniff(iface=args.interface, store=0, prn=produce_callback)
def main(): ta3Host = '127.0.0.1' toP = b'prioritizer' toDX = b'dx' toUI = b'ui' kafkaServer = ta3Host + ':9092' kafka = KafkaClient(kafkaServer) producer = SimpleProducer(kafka) uiProducer = KeyedProducer(kafka) consumer = KafkaConsumer(toP, bootstrap_servers=[kafkaServer], consumer_timeout_ms=20) def sendMsg(m): i = random.randint(1, 100) if i > 70: print("Segment #" + m + " is highly suspect. Immediately notify user.") uiProducer.send_messages(toUI, b'fromPR', m) print("Notify diagnostic engine of segment #" + m + "...") producer.send_messages(toDX, m) def recvMsg(): try: x = consumer.next() return x except ConsumerTimeout: return None oper(sendMsg, recvMsg)
def test_async_keyed_producer(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True) resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) self.assertEqual(len(resp), 0) # wait for the server to report a new highwatermark while self.current_offset(self.topic, partition) == start_offset: time.sleep(0.1) self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ]) producer.stop()
def _connect_producer(self): """If producer is not connected try to connect it now. :returns: bool -- True if producer is connected """ if self._prod is None: try: self._prod = KeyedProducer(self._conn, partitioner=FingerprintPartitioner, codec=CODEC_SNAPPY) except BrokerResponseError: self._prod = None if self._manager is not None: self._manager.logger.backend.warning( "Could not connect producer to Kafka server") return False return True
def keyedProducerTest2(): '''test KeyedProducer @topic:单replica情况(JOB_TEST_1) @function:测试KeyedProducer,向指定的broker发布消息, 并验证develops-dev1:9193关闭之后的异常报错 ''' import pdb pdb.set_trace() kafkaClient = KafkaClient('devops-dev1:9193') producer = KeyedProducer(kafkaClient) message = "This is a test-" index = 0 while True: tmpmsg = message + str(index) producer.send_messages(b'JOB_TEST_1', 'keys', tmpmsg) index += 1 time.sleep(1)
def producer(self): if self._producer is None and not self._has_error: if self.kafka is not None: self._producer = KeyedProducer(self._kafka) else: # if self.kafka is None then we should be in an error state assert self._has_error return self._producer
def test_keyedproducer_message_types(self): client = MagicMock() client.get_partition_ids_for_topic.return_value = [0, 1] producer = KeyedProducer(client) topic = b"test-topic" key = b"testkey" bad_data_types = (u"你怎么样?", 12, ["a", "list"], ("a", "tuple"), {"a": "dict"}) for m in bad_data_types: with self.assertRaises(TypeError): logging.debug("attempting to send message of type %s", type(m)) producer.send_messages(topic, key, m) good_data_types = (b"a string!", None) for m in good_data_types: # This should not raise an exception producer.send_messages(topic, key, m)
def keyed_messages(): '''Keyed messages''' from kafka import (KafkaClient, KeyedProducer, Murmur2Partitioner, RoundRobinPartitioner) kafka = KafkaClient(KAFKA_SERVER) # HashedPartitioner is default (currently uses python hash()) producer = KeyedProducer(kafka) producer.send_messages(b'topic1', b'key1', b'some message') producer.send_messages(b'topic1', b'key2', b'this methode') # Murmur2Partitioner attempts to mirror the java client hashing producer = KeyedProducer(kafka, partitioner=Murmur2Partitioner) # Or just produce round-robin (or just use SimpleProducer) producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner)
class KeyedProducer(BaseStreamProducer): def __init__(self, connection, topic_done, partitioner_cls, codec): self._prod = None self._conn = connection self._topic_done = topic_done self._partitioner_cls = partitioner_cls self._codec = codec def _connect_producer(self): if self._prod is None: try: self._prod = KafkaKeyedProducer(self._conn, partitioner=self._partitioner_cls, codec=self._codec) except BrokerResponseError: self._prod = None logger.warning("Could not connect producer to Kafka server") return False return True def send(self, key, *messages): success = False max_tries = 5 if self._connect_producer(): n_tries = 0 while not success and n_tries < max_tries: try: self._prod.send_messages(self._topic_done, key, *messages) success = True except MessageSizeTooLargeError as e: logger.error(str(e)) break except BrokerResponseError: n_tries += 1 logger.warning( "Could not send message. Try {0}/{1}".format( n_tries, max_tries) ) sleep(1.0) return success def flush(self): if self._prod is not None: self._prod.stop() def get_offset(self, partition_id): # Kafka has it's own offset management raise KeyError
def waitBrokerRecover(kafkaClient): '''wait broker recover''' try: producer = KeyedProducer(kafkaClient) except KafkaUnavailableError: time.sleep(10) else: return producer
def kafkaTasks(self, addr, topic,tasks): try : from kafka import SimpleProducer, KafkaClient, KeyedProducer except: logger.error("kafka-python is not installed") raise Exception("kafka-python is not installed") kafka_client = None try : kafka_client = KafkaClient(addr) producer = KeyedProducer(kafka_client) for task in tasks: #self.producer.send_messages(self.warehouse,task.id, json.dumps(task,default=object2dict)) producer.send_messages(topic, self.manager.name, cPickle.dumps(task)) finally: if kafka_client: kafka_client.close()
def test_hashed_partitioner(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send(self.topic, 1, self.msg("one")) resp2 = producer.send(self.topic, 2, self.msg("two")) resp3 = producer.send(self.topic, 3, self.msg("three")) resp4 = producer.send(self.topic, 3, self.msg("four")) resp5 = producer.send(self.topic, 4, self.msg("five")) self.assert_produce_response(resp1, start_offset1 + 0) self.assert_produce_response(resp2, start_offset0 + 0) self.assert_produce_response(resp3, start_offset1 + 1) self.assert_produce_response(resp4, start_offset1 + 2) self.assert_produce_response(resp5, start_offset0 + 1) self.assert_fetch_offset( 0, start_offset0, [self.msg("two"), self.msg("five")]) self.assert_fetch_offset( 1, start_offset1, [self.msg("one"), self.msg("three"), self.msg("four")]) producer.stop()
def test_hashed_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send_messages(self.topic, self.key("1"), self.msg("one")) resp2 = producer.send_messages(self.topic, self.key("2"), self.msg("two")) resp3 = producer.send_messages(self.topic, self.key("3"), self.msg("three")) resp4 = producer.send_messages(self.topic, self.key("3"), self.msg("four")) resp5 = producer.send_messages(self.topic, self.key("4"), self.msg("five")) offsets = {partitions[0]: start_offsets[0], partitions[1]: start_offsets[1]} messages = {partitions[0]: [], partitions[1]: []} keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]] resps = [resp1, resp2, resp3, resp4, resp5] msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]] for key, resp, msg in zip(keys, resps, msgs): k = hash(key) % 2 partition = partitions[k] offset = offsets[partition] self.assert_produce_response(resp, offset) offsets[partition] += 1 messages[partition].append(msg) self.assert_fetch_offset(partitions[0], start_offsets[0], messages[partitions[0]]) self.assert_fetch_offset(partitions[1], start_offsets[1], messages[partitions[1]]) producer.stop()
def test_hashed_partitioner(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send(self.topic, self.key("1"), self.msg("one")) resp2 = producer.send(self.topic, self.key("2"), self.msg("two")) resp3 = producer.send(self.topic, self.key("3"), self.msg("three")) resp4 = producer.send(self.topic, self.key("3"), self.msg("four")) resp5 = producer.send(self.topic, self.key("4"), self.msg("five")) offsets = {0: start_offset0, 1: start_offset1} messages = {0: [], 1: []} keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]] resps = [resp1, resp2, resp3, resp4, resp5] msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]] for key, resp, msg in zip(keys, resps, msgs): k = hash(key) % 2 offset = offsets[k] self.assert_produce_response(resp, offset) offsets[k] += 1 messages[k].append(msg) self.assert_fetch_offset(0, start_offset0, messages[0]) self.assert_fetch_offset(1, start_offset1, messages[1]) producer.stop()
def _connect_producer(self): if self._prod is None: try: self._prod = KafkaKeyedProducer(self._conn, partitioner=self._partitioner_cls, codec=CODEC_SNAPPY) except BrokerResponseError: self._prod = None logger.warning("Could not connect producer to Kafka server") return False return True
def test_keyedproducer_message_types(self): client = MagicMock() client.get_partition_ids_for_topic.return_value = [0, 1] producer = KeyedProducer(client) topic = b"test-topic" key = b"testkey" bad_data_types = (u'你怎么样?', 12, ['a', 'list'], ('a', 'tuple'), {'a': 'dict'},) for m in bad_data_types: with self.assertRaises(TypeError): logging.debug("attempting to send message of type %s", type(m)) producer.send_messages(topic, key, m) good_data_types = (b'a string!', None,) for m in good_data_types: # This should not raise an exception producer.send_messages(topic, key, m)
def kafkaTasks(self, addr, topic, tasks): try: from kafka import SimpleProducer, KafkaClient, KeyedProducer except: logger.error("kafka-python is not installed") raise Exception("kafka-python is not installed") kafka_client = None try: kafka_client = KafkaClient(addr) producer = KeyedProducer(kafka_client) for task in tasks: #self.producer.send_messages(self.warehouse,task.id, json.dumps(task,default=object2dict)) producer.send_messages(topic, self.manager.name, cPickle.dumps(task)) finally: if kafka_client: kafka_client.close()
class KafkaBolt(Bolt): def initialize(self, stormconf, ctx): self.kafka_client = KafkaClient(config['kafka']['hosts']) self.keyed_producer = KeyedProducer(self.kafka_client) self.simple_producer = SimpleProducer(self.kafka_client) def process(self, tup): report_id, record_type, report_data = tup.values self.log('Processing: %s' % report_id) json_data = str(report_data) report_id = str(report_id) topic = str("sanitised") if record_type == "entry": payload = str("e" + json_data) elif record_type == "header": payload = str("h" + json_data) elif record_type == "footer": payload = str("f" + json_data) self.keyed_producer.send(topic, report_id, payload)
def __init__(self,warehouse,warehouse_result): self.warehouse = warehouse self.warehouse_result = warehouse_result self.kafka = KafkaClient(Conf.getWareHouseAddr()) self.producer = KeyedProducer(self.kafka) self.consumer = KafkaConsumer(self.warehouse, bootstrap_servers=[Conf.getWareHouseAddr()], group_id="cnlab", auto_commit_enable=True, auto_commit_interval_ms=30 * 1000, auto_offset_reset='smallest')
def init(): oauth_client = Oauth(config.get('oauth', 'consumer_key'), config.get('oauth', 'consumer_secret'), config.get('oauth', 'request_token_url'), config.get('oauth', 'access_token_url'), config.get('oauth', 'authorize_url'), version=config.get('oauth', 'version')) request = Request(url=config.get('twitter', 'streaming_filter_url'), method="POST", is_streaming=True, headers={'Accept-Encoding': 'deflate, gzip '}, payload={'locations': '-118.39,30.41,-59.61,49.46'}, token=token) max_stream = int(config.get('twitter', 'max_stream_responses')) topic = config.get('kafka', 'topic') max_skip_invalid_responses = config.getint('twitter', 'max_skip_invalid_response') skip_invalid_responses = config.getboolean('twitter', 'skip_invalid') producer = KeyedProducer(kafka_client, async=True) twitter = TwitterStream(oauth_client, json) tweets = twitter.get_tweets(request) # Starts here. try: if max_stream < 0: send_unlimited_messages(tweets, producer, topic) else: send_limited_messages(max_stream, tweets, producer, topic, skip_invalid_responses, max_skip_invalid_responses) except Exception as e: print e finally: producer.stop() kafka_client.close()
class KafkaLoggingHandler(logging.Handler): def __init__(self, host, port, topic, key=None): logging.Handler.__init__(self) self.kafka_client = KafkaClient(host, port) self.key = key if key is None: self.producer = SimpleProducer(self.kafka_client, topic) else: self.producer = KeyedProducer(self.kafka_client, topic) def emit(self, record): #drop kafka logging to avoid infinite recursion if record.name == 'kafka': return try: #use default formatting msg = self.format(record) #produce message if self.key is None: self.producer.send_messages(msg) else: self.producer.send(self.key, msg) except: import traceback ei = sys.exc_info() traceback.print_exception(ei[0], ei[1], ei[2], None, sys.stderr) del ei def close(self): self.producer.stop() logging.Handler.close(self)
def test_keyedproducer_message_types(self): client = MagicMock() client.get_partition_ids_for_topic.return_value = [0, 1] producer = KeyedProducer(client) topic = b"test-topic" key = b"testkey" bad_data_types = ( u'你怎么样?', 12, ['a', 'list'], ('a', 'tuple'), { 'a': 'dict' }, ) for m in bad_data_types: with self.assertRaises(TypeError): logging.debug("attempting to send message of type %s", type(m)) producer.send_messages(topic, key, m) good_data_types = ( b'a string!', None, ) for m in good_data_types: # This should not raise an exception producer.send_messages(topic, key, m)
def keyedProducerTest3(): '''test KeyedProducer @topic:单replica情况(JOB_TEST_1) @function:测试KeyedProducer,向指定的broker发布消息, 并验证develops-dev1:9193关闭之后的异常的恢复情况 (等待10秒,不用重新拉起,自动关联) ''' import pdb pdb.set_trace() kafkaClient = KafkaClient('devops-dev1:9193') producer = KeyedProducer(kafkaClient) message = "This is a test-" index = 0 while True: try: tmpmsg = message + str(index) producer.send_messages(b'JOB_TEST_1', 'keys', tmpmsg) index += 1 time.sleep(1) except (FailedPayloadsError,KafkaUnavailableError), msg: print 'Occur FailedPayloadsError error, msg:', msg time.sleep(10)
class DockerExecutor(object): def __init__(self,warehouse,warehouse_result): self.warehouse = warehouse self.warehouse_result = warehouse_result self.kafka = KafkaClient(Conf.getWareHouseAddr()) self.producer = KeyedProducer(self.kafka) self.consumer = KafkaConsumer(self.warehouse, bootstrap_servers=[Conf.getWareHouseAddr()], group_id="cnlab", auto_commit_enable=True, auto_commit_interval_ms=30 * 1000, auto_offset_reset='smallest') def run(self): i=1 for message in self.consumer.fetch_messages(): logger.debug("%d,%s:%s:%s: key=%s " % (i,message.topic, message.partition, message.offset, message.key)) task = cPickle.loads(message.value) i = i + 1 result = task.run(0) self.producer.send_messages(self.warehouse_result, task.id, cPickle.dumps(result))
def main(): # To send messages synchronously kafka = KafkaClient('localhost:9092') producer = KeyedProducer(kafka) # Insure that topic exists kafka.ensure_topic_exists('test') while True: input_str = raw_input("Press enter to send another message, otherwise press 'q' to quit: ") if input_str and input_str in "qQ": sys.exit(0) if not input_str: print "No input was provided" else: producer.send_messages( 'test', # topic 'topic-key', # key "(time: {}, message: {})".format(get_time(), input_str), # message )
def keyedProducerTest3(): '''test KeyedProducer @topic:单replica情况(JOB_TEST_1) @function:测试KeyedProducer,向指定的broker发布消息, 并验证develops-dev1:9193关闭之后的异常的恢复情况 (等待10秒,不用重新拉起,自动关联) ''' import pdb pdb.set_trace() kafkaClient = KafkaClient('devops-dev1:9193') producer = KeyedProducer(kafkaClient) message = "This is a test-" index = 0 while True: try: tmpmsg = message + str(index) producer.send_messages(b'JOB_TEST_1', 'keys', tmpmsg) index += 1 time.sleep(1) except (FailedPayloadsError, KafkaUnavailableError) as msg: print('Occur FailedPayloadsError error, msg:', msg) time.sleep(10)
class KeyedProducer(BaseStreamProducer): def __init__(self, connection, topic_done, partitioner_cls): self._prod = None self._conn = connection self._topic_done = topic_done self._partitioner_cls = partitioner_cls def _connect_producer(self): if self._prod is None: try: self._prod = KafkaKeyedProducer( self._conn, partitioner=self._partitioner_cls, codec=CODEC_SNAPPY) except BrokerResponseError: self._prod = None logger.warning("Could not connect producer to Kafka server") return False return True def send(self, key, *messages): success = False max_tries = 5 if self._connect_producer(): n_tries = 0 while not success and n_tries < max_tries: try: self._prod.send_messages(self._topic_done, key, *messages) success = True except MessageSizeTooLargeError, e: logger.error(str(e)) break except BrokerResponseError: n_tries += 1 logger.warning( "Could not send message. Try {0}/{1}".format( n_tries, max_tries)) sleep(1.0)
class KeyedProducer(BaseStreamProducer): def __init__(self, connection, topic_done, partitioner_cls): self._prod = None self._conn = connection self._topic_done = topic_done self._partitioner_cls = partitioner_cls def _connect_producer(self): if self._prod is None: try: self._prod = KafkaKeyedProducer(self._conn, partitioner=self._partitioner_cls, codec=CODEC_SNAPPY) except BrokerResponseError: self._prod = None logger.warning("Could not connect producer to Kafka server") return False return True def send(self, key, *messages): success = False max_tries = 5 if self._connect_producer(): n_tries = 0 while not success and n_tries < max_tries: try: self._prod.send_messages(self._topic_done, key, *messages) success = True except MessageSizeTooLargeError, e: logger.error(str(e)) break except BrokerResponseError: n_tries += 1 logger.warning( "Could not send message. Try {0}/{1}".format( n_tries, max_tries) ) sleep(1.0)
def test_keyedproducer_null_payload(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) key = "test" resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) self.assert_produce_response(resp, start_offsets[0]) resp = producer.send_messages(self.topic, self.key("key2"), None) self.assert_produce_response(resp, start_offsets[1]) resp = producer.send_messages(self.topic, self.key("key3"), None) self.assert_produce_response(resp, start_offsets[0]+1) resp = producer.send_messages(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp, start_offsets[1]+1) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), None ]) self.assert_fetch_offset(partitions[1], start_offsets[1], [ None, self.msg("four") ]) producer.stop()
def test_round_robin_partitioner(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) resp1 = producer.send(self.topic, self.key("key1"), self.msg("one")) resp2 = producer.send(self.topic, self.key("key2"), self.msg("two")) resp3 = producer.send(self.topic, self.key("key3"), self.msg("three")) resp4 = producer.send(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp1, start_offset0+0) self.assert_produce_response(resp2, start_offset1+0) self.assert_produce_response(resp3, start_offset0+1) self.assert_produce_response(resp4, start_offset1+1) self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("three") ]) self.assert_fetch_offset(1, start_offset1, [ self.msg("two"), self.msg("four") ]) producer.stop()
def test_round_robin_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) resp1 = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) resp2 = producer.send_messages(self.topic, self.key("key2"), self.msg("two")) resp3 = producer.send_messages(self.topic, self.key("key3"), self.msg("three")) resp4 = producer.send_messages(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp1, start_offsets[0]+0) self.assert_produce_response(resp2, start_offsets[1]+0) self.assert_produce_response(resp3, start_offsets[0]+1) self.assert_produce_response(resp4, start_offsets[1]+1) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("three") ]) self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("two"), self.msg("four") ]) producer.stop()
def test_switch_leader_keyed_producer(self): topic = self.topic producer = KeyedProducer(self.client, async=False) # Send 10 random messages for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) # kill leader for partition 0 self._kill_leader(topic, 0) recovered = False started = time.time() timeout = 60 while not recovered and (time.time() - started) < timeout: try: key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) if producer.partitioners[topic].partition(key) == 0: recovered = True except (FailedPayloadsError, ConnectionError, RequestTimedOutError, NotLeaderForPartitionError): log.debug("caught exception sending message -- will retry") continue # Verify we successfully sent the message self.assertTrue(recovered) # send some more messages just to make sure no more exceptions for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg)
def test_hashed_partitioner(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send(self.topic, 1, self.msg("one")) resp2 = producer.send(self.topic, 2, self.msg("two")) resp3 = producer.send(self.topic, 3, self.msg("three")) resp4 = producer.send(self.topic, 3, self.msg("four")) resp5 = producer.send(self.topic, 4, self.msg("five")) self.assert_produce_response(resp1, start_offset1+0) self.assert_produce_response(resp2, start_offset0+0) self.assert_produce_response(resp3, start_offset1+1) self.assert_produce_response(resp4, start_offset1+2) self.assert_produce_response(resp5, start_offset0+1) self.assert_fetch_offset(0, start_offset0, [ self.msg("two"), self.msg("five") ]) self.assert_fetch_offset(1, start_offset1, [ self.msg("one"), self.msg("three"), self.msg("four") ]) producer.stop()