def __init__(self, config): self.brokers = config['brokers'] self.topic = config['topic'] self.kafka = KafkaClient(self.brokers) if config['partitioner'] is None: self.producer = KeyedProducer(self.kafka, partitioner=RoundRobinPartitioner) else: self.producer = KeyedProducer(self.kafka, partitioner=config['partitioner'])
def main(): parser = make_parser() args = parser.parse_args() if not valid_args(args): parser.print_help() return if args.local: sniff(iface=args.interface, store=0, prn=local_out) return zk = KazooClient(args.zookeeper) zk.start() kafka = KafkaClient(zk_broker_list(zk)) # the sniff callback only takes one parameter which is the packet # so everything else must be global global producer producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner) global packet_count packet_count = 0 global topic topic = args.topic global debug debug = args.debug sniff(iface=args.interface, store=0, prn=produce_callback)
def test_hashed_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send_messages(self.topic, self.key("1"), self.msg("one")) resp2 = producer.send_messages(self.topic, self.key("2"), self.msg("two")) resp3 = producer.send_messages(self.topic, self.key("3"), self.msg("three")) resp4 = producer.send_messages(self.topic, self.key("3"), self.msg("four")) resp5 = producer.send_messages(self.topic, self.key("4"), self.msg("five")) offsets = {partitions[0]: start_offsets[0], partitions[1]: start_offsets[1]} messages = {partitions[0]: [], partitions[1]: []} keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]] resps = [resp1, resp2, resp3, resp4, resp5] msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]] for key, resp, msg in zip(keys, resps, msgs): k = hash(key) % 2 partition = partitions[k] offset = offsets[partition] self.assert_produce_response(resp, offset) offsets[partition] += 1 messages[partition].append(msg) self.assert_fetch_offset(partitions[0], start_offsets[0], messages[partitions[0]]) self.assert_fetch_offset(partitions[1], start_offsets[1], messages[partitions[1]]) producer.stop()
def main(): ta3Host = '127.0.0.1' toP = b'prioritizer' toDX = b'dx' toUI = b'ui' kafkaServer = ta3Host + ':9092' kafka = KafkaClient(kafkaServer) producer = SimpleProducer(kafka) uiProducer = KeyedProducer(kafka) consumer = KafkaConsumer(toP, bootstrap_servers=[kafkaServer], consumer_timeout_ms=20) def sendMsg(m): i = random.randint(1, 100) if i > 70: print("Segment #" + m + " is highly suspect. Immediately notify user.") uiProducer.send_messages(toUI, b'fromPR', m) print("Notify diagnostic engine of segment #" + m + "...") producer.send_messages(toDX, m) def recvMsg(): try: x = consumer.next() return x except ConsumerTimeout: return None oper(sendMsg, recvMsg)
def test_hashed_partitioner(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send(self.topic, self.key("1"), self.msg("one")) resp2 = producer.send(self.topic, self.key("2"), self.msg("two")) resp3 = producer.send(self.topic, self.key("3"), self.msg("three")) resp4 = producer.send(self.topic, self.key("3"), self.msg("four")) resp5 = producer.send(self.topic, self.key("4"), self.msg("five")) offsets = {0: start_offset0, 1: start_offset1} messages = {0: [], 1: []} keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]] resps = [resp1, resp2, resp3, resp4, resp5] msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]] for key, resp, msg in zip(keys, resps, msgs): k = hash(key) % 2 offset = offsets[k] self.assert_produce_response(resp, offset) offsets[k] += 1 messages[k].append(msg) self.assert_fetch_offset(0, start_offset0, messages[0]) self.assert_fetch_offset(1, start_offset1, messages[1]) producer.stop()
def test_round_robin_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) resp1 = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) resp2 = producer.send_messages(self.topic, self.key("key2"), self.msg("two")) resp3 = producer.send_messages(self.topic, self.key("key3"), self.msg("three")) resp4 = producer.send_messages(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp1, start_offsets[0] + 0) self.assert_produce_response(resp2, start_offsets[1] + 0) self.assert_produce_response(resp3, start_offsets[0] + 1) self.assert_produce_response(resp4, start_offsets[1] + 1) self.assert_fetch_offset( partitions[0], start_offsets[0], [self.msg("one"), self.msg("three")]) self.assert_fetch_offset( partitions[1], start_offsets[1], [self.msg("two"), self.msg("four")]) producer.stop()
def test_hashed_partitioner(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send(self.topic, 1, self.msg("one")) resp2 = producer.send(self.topic, 2, self.msg("two")) resp3 = producer.send(self.topic, 3, self.msg("three")) resp4 = producer.send(self.topic, 3, self.msg("four")) resp5 = producer.send(self.topic, 4, self.msg("five")) self.assert_produce_response(resp1, start_offset1 + 0) self.assert_produce_response(resp2, start_offset0 + 0) self.assert_produce_response(resp3, start_offset1 + 1) self.assert_produce_response(resp4, start_offset1 + 2) self.assert_produce_response(resp5, start_offset0 + 1) self.assert_fetch_offset( 0, start_offset0, [self.msg("two"), self.msg("five")]) self.assert_fetch_offset( 1, start_offset1, [self.msg("one"), self.msg("three"), self.msg("four")]) producer.stop()
def test_keyedproducer_message_types(self): client = MagicMock() client.get_partition_ids_for_topic.return_value = [0, 1] producer = KeyedProducer(client) topic = b"test-topic" key = b"testkey" bad_data_types = ( u'你怎么样?', 12, ['a', 'list'], ('a', 'tuple'), { 'a': 'dict' }, ) for m in bad_data_types: with self.assertRaises(TypeError): logging.debug("attempting to send message of type %s", type(m)) producer.send_messages(topic, key, m) good_data_types = ( b'a string!', None, ) for m in good_data_types: # This should not raise an exception producer.send_messages(topic, key, m)
def test_keyedproducer_null_payload(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) key = "test" resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) self.assert_produce_response(resp, start_offsets[0]) resp = producer.send_messages(self.topic, self.key("key2"), None) self.assert_produce_response(resp, start_offsets[1]) resp = producer.send_messages(self.topic, self.key("key3"), None) self.assert_produce_response(resp, start_offsets[0] + 1) resp = producer.send_messages(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp, start_offsets[1] + 1) self.assert_fetch_offset(partitions[0], start_offsets[0], [self.msg("one"), None]) self.assert_fetch_offset(partitions[1], start_offsets[1], [None, self.msg("four")]) producer.stop()
def waitBrokerRecover(kafkaClient): '''wait broker recover''' try: producer = KeyedProducer(kafkaClient) except KafkaUnavailableError: time.sleep(10) else: return producer
def message_sender(m): """Send (key, value) to a Kafka producer""" client = SimpleClient('localhost:9092') producer = KeyedProducer(client) rdds = m.collect() for d in rdds: producer.send_messages('flask', bytes.encode(str(d[0])), d[1]) return
def __init__(self, host, port, topic, key=None): logging.Handler.__init__(self) self.kafka_client = KafkaClient(host, port) self.key = key if key is None: self.producer = SimpleProducer(self.kafka_client, topic) else: self.producer = KeyedProducer(self.kafka_client, topic)
def producer(self): if self._producer is None and not self._has_error: if self.kafka is not None: self._producer = KeyedProducer(self._kafka) else: # if self.kafka is None then we should be in an error state assert self._has_error return self._producer
def genData(topic): producer = KeyedProducer(kafka) while True: for line in fileinput.input(source_file): key = line.split("\t")[0] print key print line.rstrip() producer.send(topic, key, line.rstrip()) time.sleep(0.1) # Creating some delay to allow fileinput.close()
def genData(topic): producer = KeyedProducer(kafka) while True: with open(source_file) as f: for line in f: key = line.split(" ")[0] producer.send(topic, key, line.rstrip()) time.sleep( 0.1 ) # Creating some delay to allow proper rendering of the cab locations on the map source_file.close()
def __init__(self, host, topic, **kwargs): logging.Handler.__init__(self) self.key = kwargs.get("key", None) self.kafka_topic_name = topic if not self.key: self.producer = KafkaProducer(bootstrap_servers=host, api_version=(0, 10, 1), **kwargs) else: self.producer = KeyedProducer(bootstrap_servers=host, api_version=(0, 10, 1), **kwargs)
def test_async_keyed_producer(self): start_offset0 = self.current_offset(self.topic, 0) producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner, async=True) resp = producer.send(self.topic, self.key("key1"), self.msg("one")) self.assertEqual(len(resp), 0) self.assert_fetch_offset(0, start_offset0, [self.msg("one")]) producer.stop()
def __init__(self, warehouse, warehouse_result): self.warehouse = warehouse self.warehouse_result = warehouse_result self.kafka = KafkaClient(Conf.getWareHouseAddr()) self.producer = KeyedProducer(self.kafka) self.consumer = KafkaConsumer( self.warehouse, bootstrap_servers=[Conf.getWareHouseAddr()], group_id="cnlab", auto_commit_enable=True, auto_commit_interval_ms=30 * 1000, auto_offset_reset='smallest')
def _connect_producer(self): """If producer is not connected try to connect it now. :returns: bool -- True if producer is connected """ if self._prod is None: try: self._prod = KeyedProducer(self._conn, partitioner=FingerprintPartitioner, codec=CODEC_SNAPPY) except BrokerResponseError: self._prod = None if self._manager is not None: self._manager.logger.backend.warning( "Could not connect producer to Kafka server") return False return True
def test_async_keyed_producer(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True) resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) self.assertEqual(len(resp), 0) # wait for the server to report a new highwatermark while self.current_offset(self.topic, partition) == start_offset: time.sleep(0.1) self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ]) producer.stop()
def keyedProducerTest2(): '''test KeyedProducer @topic:单replica情况(JOB_TEST_1) @function:测试KeyedProducer,向指定的broker发布消息, 并验证develops-dev1:9193关闭之后的异常报错 ''' import pdb pdb.set_trace() kafkaClient = KafkaClient('devops-dev1:9193') producer = KeyedProducer(kafkaClient) message = "This is a test-" index = 0 while True: tmpmsg = message + str(index) producer.send_messages(b'JOB_TEST_1', 'keys', tmpmsg) index += 1 time.sleep(1)
def kafkaTasks(self, addr, topic, tasks): try: from kafka import SimpleProducer, KafkaClient, KeyedProducer except: logger.error("kafka-python is not installed") raise Exception("kafka-python is not installed") kafka_client = None try: kafka_client = KafkaClient(addr) producer = KeyedProducer(kafka_client) for task in tasks: #self.producer.send_messages(self.warehouse,task.id, json.dumps(task,default=object2dict)) producer.send_messages(topic, self.manager.name, cPickle.dumps(task)) finally: if kafka_client: kafka_client.close()
def keyedProducerTest3(): '''test KeyedProducer @topic:单replica情况(JOB_TEST_1) @function:测试KeyedProducer,向指定的broker发布消息, 并验证develops-dev1:9193关闭之后的异常的恢复情况 (等待10秒,不用重新拉起,自动关联) ''' import pdb pdb.set_trace() kafkaClient = KafkaClient('devops-dev1:9193') producer = KeyedProducer(kafkaClient) message = "This is a test-" index = 0 while True: try: tmpmsg = message + str(index) producer.send_messages(b'JOB_TEST_1', 'keys', tmpmsg) index += 1 time.sleep(1) except (FailedPayloadsError, KafkaUnavailableError) as msg: print('Occur FailedPayloadsError error, msg:', msg) time.sleep(10)
def main(): ta3Host = '127.0.0.1' toDX = b'dx' toUI = b'ui' kafkaServer = ta3Host + ':9092' kafka = KafkaClient(kafkaServer) producer = KeyedProducer(kafka) consumer = KafkaConsumer(toDX, bootstrap_servers=[kafkaServer], consumer_timeout_ms=20) def sendMsg(m): producer.send_messages(toUI, b'fromDX', m) def recvMsg(): try: x = consumer.next() return x except ConsumerTimeout: return None oper(sendMsg, recvMsg)
def send_messages_to_kafka(self, topic, num_msgs=-1, msg_len=70, msg_interval=0.1, is_multi=True, write=True): """ This function sends dummy messages to Kafka :param topic: Name of Kafka topic :param num_msgs: Number messages to be sent to Kafka :param msg_len: Length of the message :param msg_interval: Interval time for message to be sent :param is_multi: True if multi-partitioned kafka; False if single partition :return: None """ print "Topic:", topic, "#msgs=", num_msgs, "len=", msg_len, "int=", msg_interval, "mult=", is_multi, "w=", write if write: out = open("/tmp/kafka-input-module-msgs.log", "w") kafka_client = KafkaClient(self.broker) if is_multi: producer = KeyedProducer(kafka_client, partitioner=RoundRobinPartitioner) producer_type = 'KeyedProducer' else: producer = SimpleProducer(kafka_client) producer_type = 'SimpleProducer' count = 0 num_msgs = int(num_msgs) while count != num_msgs: count += 1 msg = str(count) + ": Message from " + producer_type + " : " + str(datetime.now()) + " " + "#"*msg_len msg = msg[:msg_len] if is_multi: producer.send_messages(topic, "key"+str(count), msg) else: producer.send_messages(topic, msg) if write: out.write(msg+"\n") time.sleep(msg_interval) print "Sent", count, "messages!!"
def test_switch_leader_keyed_producer(self): topic = self.topic producer = KeyedProducer(self.client, async_send=False) # Send 10 random messages for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) # kill leader for partition 0 self._kill_leader(topic, 0) recovered = False started = time.time() timeout = 60 while not recovered and (time.time() - started) < timeout: try: key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) if producer.partitioners[topic].partition(key) == 0: recovered = True except (FailedPayloadsError, KafkaConnectionError, RequestTimedOutError, NotLeaderForPartitionError): log.debug("caught exception sending message -- will retry") continue # Verify we successfully sent the message self.assertTrue(recovered) # send some more messages just to make sure no more exceptions for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg)
def __init__(self, settings, no_batches, no_scoring, no_incoming): self._kafka = KafkaClient(settings.get('KAFKA_LOCATION')) self._producer = KeyedProducer(self._kafka, partitioner=Crc32NamePartitioner, codec=CODEC_SNAPPY) self._in_consumer = SimpleConsumer(self._kafka, settings.get('FRONTIER_GROUP'), settings.get('INCOMING_TOPIC'), buffer_size=1048576, max_buffer_size=10485760) if not no_scoring: self._scoring_consumer = SimpleConsumer( self._kafka, settings.get('FRONTIER_GROUP'), settings.get('SCORING_TOPIC'), buffer_size=262144, max_buffer_size=1048576) self._offset_fetcher = Fetcher(self._kafka, settings.get('OUTGOING_TOPIC'), settings.get('FRONTIER_GROUP')) self._manager = FrontierManager.from_settings(settings) self._backend = self._manager.backend self._encoder = Encoder(self._manager.request_model) self._decoder = Decoder(self._manager.request_model, self._manager.response_model) self.consumer_batch_size = settings.get('CONSUMER_BATCH_SIZE', 128) self.outgoing_topic = settings.get('OUTGOING_TOPIC') self.max_next_requests = settings.MAX_NEXT_REQUESTS self.slot = Slot(self.new_batch, self.consume_incoming, self.consume_scoring, no_batches, no_scoring, settings.get('NEW_BATCH_DELAY', 60.0), no_incoming) self.job_id = 0 self.stats = {}
def send_to_kafka(self, kafka_client, is_multi_partitioner, noOfMsgs, topic, msg_interval=1): """ This function sends the dummy messages to Kafka server :param kafka_client: :param is_multi_partitioner: True if multi-partitioned kafka; False if single partition :param noOfMsgs: Number messages to be sent to Kafka :param topic: Name of Kafka topic :param msg_interval: Interval time for message to be sent :return: None """ kafka = KafkaClient(kafka_client) #logger.debug("Arguments : %s %s %s %s %s" % (kafka_client, is_multi_partitioner, noOfMsgs, topic, msg_interval)) if is_multi_partitioner is True: self.producer = KeyedProducer( kafka, partitioner=RoundRobinPartitioner ) if ( noOfMsgs == -1 ): x=1 while True: self.producer.send_messages(topic, "key"+str(x), "Message sent from Keyed Producer : " + str(datetime.now().time())) x += 1 time.sleep(msg_interval) else: for i in range(0, noOfMsgs): self.producer.send_messages(topic, "k" + str(i), "Message sent from Keyed Producer : " + str(datetime.now().time()) ) else: self.producer = SimpleProducer(kafka) if ( noOfMsgs == -1 ): x=1 while True: self.producer.send_messages(topic, "Message sent from Simple Producer : " + str(datetime.now().time())) x += 1 time.sleep(msg_interval) else: for i in range(0, noOfMsgs): self.producer.send_messages(topic, "Message sent from Simple Producer : " + str(datetime.now().time()) )
def test_round_robin_partitioner(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) resp1 = producer.send(self.topic, self.key("key1"), self.msg("one")) resp2 = producer.send(self.topic, self.key("key2"), self.msg("two")) resp3 = producer.send(self.topic, self.key("key3"), self.msg("three")) resp4 = producer.send(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp1, start_offset0 + 0) self.assert_produce_response(resp2, start_offset1 + 0) self.assert_produce_response(resp3, start_offset0 + 1) self.assert_produce_response(resp4, start_offset1 + 1) self.assert_fetch_offset( 0, start_offset0, [self.msg("one"), self.msg("three")]) self.assert_fetch_offset( 1, start_offset1, [self.msg("two"), self.msg("four")]) producer.stop()
kafkaIPandPort = os.environ.get("kafkaIPandPort") intrinio_forex_key = os.environ.get("intrinio_forex_key") #send each quote to kafka topic def on_quote(quote, backlog): print("QUOTE: " , quote, "BACKLOG LENGTH: ", backlog) strQuote = json.dumps(quote) byteQuote = strQuote.encode('utf-8') #sending keyed messages are in this format #producer.send_messages(b'my-topic', b'key1', b'some message') prod.send_messages(topic, quote["code"].encode("utf-8"), byteQuote) #kafka connection cluster = kafka.KafkaClient(kafkaIPandPort) prod = KeyedProducer(cluster) topic = "forex_topic" #intrinio connection options = { 'api_key': intrinio_forex_key, 'provider': 'fxcm', 'on_quote': on_quote } client = IntrinioRealtimeClient(options) client.join(['fxcm:pair:EUR/USD','fxcm:pair:USD/JPY', 'fxcm:pair:GBP/USD','fxcm:pair:USD/CHF', 'fxcm:pair:EUR/CHF', 'fxcm:pair:AUD/USD', 'fxcm:pair:USD/CAD', 'fxcm:pair:NZD/USD', 'fxcm:pair:EUR/GBP', 'fxcm:pair:EUR/JPY', 'fxcm:pair:GBP/JPY', 'fxcm:pair:CHF/JPY', 'fxcm:pair:GBP/CHF', 'fxcm:pair:EUR/JPY','fxcm:pair:EUR/AUD','fxcm:pair:EUR/CAD','fxcm:pair:AUD/CAD','fxcm:pair:AUD/JPY','fxcm:pair:CAD/JPY','fxcm:pair:NZD/JPY','fxcm:pair:GBP/CAD','fxcm:pair:GBP/NZD', 'fxcm:pair:GBP/AUD','fxcm:pair:AUD/NZD','fxcm:pair:USD/SEK', 'fxcm:pair:EUR/SEK', 'fxcm:pair:EUR/NOK', 'fxcm:pair:USD/NOK','fxcm:pair:USD/MXN','fxcm:pair:AUD/CHF','fxcm:pair:EUR/NZD','fxcm:pair:USD/ZAR', 'fxcm:pair:ZAR/JPY','fxcm:pair:USD/TRY','fxcm:pair:EUR/TRY', 'fxcm:pair:NZD/CHF','fxcm:pair:CAD/CHF', 'fxcm:pair:NZD/CAD', 'fxcm:pair:TRY/JPY']) client.connect() client.keep_alive()