def test_send_broker_unaware_request_fail(self): 'Tests that call fails when all hosts are unavailable' mocked_conns = { ('kafka01', 9092): MagicMock(), ('kafka02', 9092): MagicMock() } # inject KafkaConnection side effects mocked_conns[('kafka01', 9092)].send.side_effect = RuntimeError("kafka01 went away (unittest)") mocked_conns[('kafka02', 9092)].send.side_effect = RuntimeError("Kafka02 went away (unittest)") def mock_get_conn(host, port): return mocked_conns[(host, port)] # patch to avoid making requests before we want it with patch.object(KafkaClient, 'load_metadata_for_topics'): with patch.object(KafkaClient, '_get_conn', side_effect=mock_get_conn): client = KafkaClient(hosts=['kafka01:9092', 'kafka02:9092']) req = KafkaProtocol.encode_metadata_request(b'client', 0) with self.assertRaises(KafkaUnavailableError): client._send_broker_unaware_request(payloads=['fake request'], encoder_fn=MagicMock(return_value='fake encoded message'), decoder_fn=lambda x: x) for key, conn in six.iteritems(mocked_conns): conn.send.assert_called_with(ANY, 'fake encoded message')
def test_send_broker_unaware_request(self): 'Tests that call works when at least one of the host is available' mocked_conns = { ('kafka01', 9092): MagicMock(), ('kafka02', 9092): MagicMock(), ('kafka03', 9092): MagicMock() } # inject KafkaConnection side effects mocked_conns[('kafka01', 9092)].send.side_effect = RuntimeError("kafka01 went away (unittest)") mocked_conns[('kafka02', 9092)].recv.return_value = 'valid response' mocked_conns[('kafka03', 9092)].send.side_effect = RuntimeError("kafka03 went away (unittest)") def mock_get_conn(host, port): return mocked_conns[(host, port)] # patch to avoid making requests before we want it with patch.object(KafkaClient, 'load_metadata_for_topics'), \ patch.object(KafkaClient, '_get_conn', side_effect=mock_get_conn): client = KafkaClient(hosts='kafka01:9092,kafka02:9092') resp = client._send_broker_unaware_request(1, 'fake request') self.assertEqual('valid response', resp) mocked_conns[('kafka02', 9092)].recv.assert_called_with(1)
def test_send_produce_request_raises_when_noleader(self, protocol, conn): "Send producer request raises LeaderNotAvailableError if leader is not available" conn.recv.return_value = 'response' # anything but None brokers = [ BrokerMetadata(0, 'broker_1', 4567), BrokerMetadata(1, 'broker_2', 5678) ] topics = [ TopicMetadata('topic_noleader', NO_ERROR, [ PartitionMetadata('topic_noleader', 0, -1, [], [], NO_LEADER), PartitionMetadata('topic_noleader', 1, -1, [], [], NO_LEADER), ]), ] protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics) client = KafkaClient(hosts=['broker_1:4567']) requests = [ProduceRequest( "topic_noleader", 0, [create_message("a"), create_message("b")])] with self.assertRaises(LeaderNotAvailableError): client.send_produce_request(requests)
def test_has_metadata_for_topic(self, protocol, conn): conn.recv.return_value = 'response' # anything but None brokers = [ BrokerMetadata(0, 'broker_1', 4567), BrokerMetadata(1, 'broker_2', 5678) ] topics = [ TopicMetadata(b'topic_still_creating', NO_LEADER, []), TopicMetadata(b'topic_doesnt_exist', UNKNOWN_TOPIC_OR_PARTITION, []), TopicMetadata(b'topic_noleaders', NO_ERROR, [ PartitionMetadata(b'topic_noleaders', 0, -1, [], [], NO_LEADER), PartitionMetadata(b'topic_noleaders', 1, -1, [], [], NO_LEADER), ]), ] protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics) client = KafkaClient(hosts=['broker_1:4567']) # Topics with no partitions return False self.assertFalse(client.has_metadata_for_topic('topic_still_creating')) self.assertFalse(client.has_metadata_for_topic('topic_doesnt_exist')) # Topic with partition metadata, but no leaders return True self.assertTrue(client.has_metadata_for_topic('topic_noleaders'))
def test_get_leader_for_partitions_reloads_metadata(self, protocol, conn): "Get leader for partitions reload metadata if it is not available" conn.recv.return_value = 'response' # anything but None brokers = [ BrokerMetadata(0, 'broker_1', 4567), BrokerMetadata(1, 'broker_2', 5678) ] topics = [ TopicMetadata('topic_no_partitions', NO_LEADER, []) ] protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics) client = KafkaClient(hosts=['broker_1:4567']) # topic metadata is loaded but empty self.assertDictEqual({}, client.topics_to_brokers) topics = [ TopicMetadata('topic_one_partition', NO_ERROR, [ PartitionMetadata('topic_no_partition', 0, 0, [0, 1], [0, 1], NO_ERROR) ]) ] protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics) # calling _get_leader_for_partition (from any broker aware request) # will try loading metadata again for the same topic leader = client._get_leader_for_partition('topic_one_partition', 0) self.assertEqual(brokers[0], leader) self.assertDictEqual({ TopicAndPartition('topic_one_partition', 0): brokers[0]}, client.topics_to_brokers)
def test_get_leader_returns_none_when_noleader(self, protocol, conn): "Getting leader for partitions returns None when the partiion has no leader" conn.recv.return_value = 'response' # anything but None brokers = {} brokers[0] = BrokerMetadata(0, 'broker_1', 4567) brokers[1] = BrokerMetadata(1, 'broker_2', 5678) topics = {} topics['topic_noleader'] = { 0: PartitionMetadata('topic_noleader', 0, -1, [], []), 1: PartitionMetadata('topic_noleader', 1, -1, [], []) } protocol.decode_metadata_response.return_value = (brokers, topics) client = KafkaClient(hosts=['broker_1:4567']) self.assertDictEqual( { TopicAndPartition('topic_noleader', 0): None, TopicAndPartition('topic_noleader', 1): None }, client.topics_to_brokers) self.assertIsNone(client._get_leader_for_partition('topic_noleader', 0)) self.assertIsNone(client._get_leader_for_partition('topic_noleader', 1)) topics['topic_noleader'] = { 0: PartitionMetadata('topic_noleader', 0, 0, [0, 1], [0, 1]), 1: PartitionMetadata('topic_noleader', 1, 1, [1, 0], [1, 0]) } protocol.decode_metadata_response.return_value = (brokers, topics) self.assertEqual(brokers[0], client._get_leader_for_partition('topic_noleader', 0)) self.assertEqual(brokers[1], client._get_leader_for_partition('topic_noleader', 1))
def test_send_broker_unaware_request_fail(self): 'Tests that call fails when all hosts are unavailable' mocked_conns = { ('kafka01', 9092): MagicMock(), ('kafka02', 9092): MagicMock() } # inject KafkaConnection side effects mocked_conns[('kafka01', 9092)].send.side_effect = RuntimeError("kafka01 went away (unittest)") mocked_conns[('kafka02', 9092)].send.side_effect = RuntimeError("Kafka02 went away (unittest)") def mock_get_conn(host, port): return mocked_conns[(host, port)] # patch to avoid making requests before we want it with patch.object(KafkaClient, 'load_metadata_for_topics'): with patch.object(KafkaClient, '_get_conn', side_effect=mock_get_conn): client = KafkaClient(hosts=['kafka01:9092', 'kafka02:9092']) with self.assertRaises(KafkaUnavailableError): client._send_broker_unaware_request(1, 'fake request') for key, conn in compat.dict_items(mocked_conns): conn.send.assert_called_with(1, 'fake request')
class KafkaMetricSender(MetricSender): def __init__(self, config): super(KafkaMetricSender, self).__init__(config) kafka_config = config["output"]["kafka"] # default topic # self.topic = kafka_config["topic"].encode('utf-8') # producer self.broker_list = kafka_config["brokerList"] self.kafka_client = None self.kafka_producer = None def open(self): self.kafka_client = KafkaClient(self.broker_list, timeout=59) self.kafka_producer = SimpleProducer( self.kafka_client, batch_send=True, batch_send_every_n=500, batch_send_every_t=30 ) def send(self, msg, topic): self.kafka_producer.send_messages(topic, json.dumps(msg)) def close(self): if self.kafka_producer is not None: self.kafka_producer.stop() if self.kafka_client is not None: self.kafka_client.close()
def assert_message_count(self, topic, check_count, timeout=10, partitions=None, at_least=False): hosts = ','.join(['%s:%d' % (broker.host, broker.port) for broker in self.brokers]) client = KafkaClient(hosts) consumer = SimpleConsumer(client, None, topic, partitions=partitions, auto_commit=False, iter_timeout=timeout) started_at = time.time() pending = consumer.pending(partitions) # Keep checking if it isn't immediately correct, subject to timeout while pending < check_count and (time.time() - started_at < timeout): pending = consumer.pending(partitions) time.sleep(0.5) consumer.stop() client.close() if pending < check_count: self.fail('Too few pending messages: found %d, expected %d' % (pending, check_count)) elif pending > check_count and not at_least: self.fail('Too many pending messages: found %d, expected %d' % (pending, check_count)) return True
def _get_offsets_from_kafka(brokers, topic, offset_time): """get dict representing kafka offsets. """ # get client client = KafkaClient(brokers) # get partitions for a topic partitions = client.topic_partitions[topic] # https://cwiki.apache.org/confluence/display/KAFKA/ # A+Guide+To+The+Kafka+Protocol# # AGuideToTheKafkaProtocol-OffsetRequest MAX_OFFSETS = 1 offset_requests = [OffsetRequestPayload(topic, part_name, offset_time, MAX_OFFSETS) for part_name in partitions.keys()] offsets_responses = client.send_offset_request(offset_requests) offset_dict = {} for response in offsets_responses: key = "_".join((response.topic, str(response.partition))) offset_dict[key] = response return offset_dict
def run(self, topic, message, hosts=None): """ Simple round-robin synchronous producer to send one message to one topic. :param hosts: Kafka hostname(s) to connect in host:port format. Comma-separated for several hosts. :type hosts: ``str`` :param topic: Kafka Topic to publish the message on. :type topic: ``str`` :param message: The message to publish. :type message: ``str`` :returns: Response data: `topic`, target `partition` where message was sent, `offset` number and `error` code (hopefully 0). :rtype: ``dict`` """ if hosts: _hosts = hosts elif self.config.get('hosts', None): _hosts = self.config['hosts'] else: raise ValueError("Need to define 'hosts' in either action or in config") # set default for empty value _client_id = self.config.get('client_id') or self.DEFAULT_CLIENT_ID client = KafkaClient(_hosts, client_id=_client_id) client.ensure_topic_exists(topic) producer = SimpleProducer(client) result = producer.send_messages(topic, kafka_bytestring(message)) if result[0]: return result[0].__dict__
def test_correlation_rollover(self): with patch.object(KafkaClient, 'load_metadata_for_topics'): big_num = 2**31 - 3 client = KafkaClient(hosts=[], correlation_id=big_num) self.assertEqual(big_num + 1, client._next_id()) self.assertEqual(big_num + 2, client._next_id()) self.assertEqual(0, client._next_id())
def send_kafka_msg(iters): # TODO: Add try/catch statements for kafka connection kafka = KafkaClient(kafka_host) producer = SimpleProducer(kafka) for key, val in iters: msg = combine_count_json(key, val) producer.send_messages(str(topic).encode("utf-8"), str(msg).encode("utf-8")) kafka.close()
def query_location_producer(lat, lng, radius, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.area_search(lat, lng, radius, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
def query_text_producer(text, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.search(text, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
def favorite_list_producer(id, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.favorite_list(id, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
def consume(kafka_host): kafka = KafkaClient(kafka_host) consumer = SimpleConsumer(kafka, 'fetcher', cfg['kafka']['pages']) producer = SimpleProducer(kafka) consumer.max_buffer_size=20*1024*1024 for msg in consumer: page = json.loads(msg.message.value) process(page, producer) kafka.close()
def timeline_producer(twitter_account, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.user_timeline(twitter_account, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
class KafkaSender(): def __init__(self): self.client=KafkaClient(hosts) #self.producer = SimpleProducer(self.client,batch_send=batch_send,batch_send_every_n=batch_send_every_n) self.producer=KafkaProducer(bootstrap_servers=hosts) self.client.ensure_topic_exists(topic) def send_messages(self,msg): self.producer.send(topic,msg)
def checker( ): client = KafkaClient(kafka2) topic = "twittercontent" partitions = client.topic_partitions[topic] offset_requests = [OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()] offsets_responses = client.send_offset_fetch_request("cpp_service_customers_50000", offset_requests) for r in offsets_responses: print "partition = %s, offset = %s"%(r.partition, r.offsets[0])
def test_ensure_topic_exists(self, decode_metadata_response, conn): conn.recv.return_value = 'response' # anything but None brokers = [ BrokerMetadata(0, 'broker_1', 4567), BrokerMetadata(1, 'broker_2', 5678) ] topics = [ TopicMetadata(b'topic_still_creating', NO_LEADER, []), TopicMetadata(b'topic_doesnt_exist', UNKNOWN_TOPIC_OR_PARTITION, []), TopicMetadata(b'topic_noleaders', NO_ERROR, [ PartitionMetadata(b'topic_noleaders', 0, -1, [], [], NO_LEADER), PartitionMetadata(b'topic_noleaders', 1, -1, [], [], NO_LEADER), ]), ] decode_metadata_response.return_value = MetadataResponse(brokers, topics) client = KafkaClient(hosts=['broker_1:4567']) with self.assertRaises(UnknownTopicOrPartitionError): client.ensure_topic_exists('topic_doesnt_exist', timeout=1) with self.assertRaises(KafkaTimeoutError): client.ensure_topic_exists('topic_still_creating', timeout=1) # This should not raise client.ensure_topic_exists('topic_noleaders', timeout=1) client.ensure_topic_exists(b'topic_noleaders', timeout=1)
class KafkaIntegrationTestCase(unittest.TestCase): create_client = True topic = None bytes_topic = None zk = None server = None def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): self.skipTest('Integration test requires KAFKA_VERSION') if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10)) self.topic = topic self.bytes_topic = topic.encode('utf-8') if self.create_client: self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port)) self.client.ensure_topic_exists(self.topic) self._messages = {} def tearDown(self): super(KafkaIntegrationTestCase, self).tearDown() if not os.environ.get('KAFKA_VERSION'): return if self.create_client: self.client.close() def current_offset(self, topic, partition): try: offsets, = self.client.send_offset_request([ OffsetRequest(kafka_bytestring(topic), partition, -1, 1) ]) except: # XXX: We've seen some UnknownErrors here and cant debug w/o server logs self.zk.child.dump_logs() self.server.child.dump_logs() raise else: return offsets.offsets[0] def msgs(self, iterable): return [ self.msg(x) for x in iterable ] def msg(self, s): if s not in self._messages: self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4())) return self._messages[s].encode('utf-8') def key(self, k): return k.encode('utf-8')
class Producer(object): def __init__(self, broker_ip_port): self.kafka = KafkaClient(broker_ip_port) self.producer = SimpleProducer(self.kafka) def send_message(self): response = self.producer.send_messages("HEY", "Hello World", "Kafka Deployment Worked!") return [("Error ", response[0].error), response] def close(self): self.kafka.close()
def setup_capture_new_messages_consumer(topic): """Seeks to the tail of the topic then returns a function that can consume messages from that point. """ kafka = KafkaClient(get_config().cluster_config.broker_list) group = str('data_pipeline_clientlib_test') consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=_ONE_MEGABYTE) consumer.seek(0, 2) # seek to tail, 0 is the offset, and 2 is the tail yield consumer kafka.close()
class Producer(KeyedProducer): """ """ def __init__(self, hosts, client_id, timeout): self._client = KafkaClient(['localhost:9092']) self._client = KafkaClient(hosts, client_id=client_id, timeout=timeout) super(Producer, self).__init__(self._client) def close(self): try: self._client.close() except: pass
def post_data(): from kafka import KafkaClient from kafka import SimpleProducer kafka = KafkaClient(app.config['KAFKA_SERVER']) producer = SimpleProducer(kafka) if not request.json: resp = 'null post data' else: resp = producer.send_messages(app.config['KAFKA_TOPIC'], str(request.json)) if resp: resp = {'error':resp[0].error,'offset':resp[0].offset} kafka.close() return jsonify(resp)
class KafkaBase(Base): """ A block defining common Kafka functionality. Properties: host (str): location of the database port (int): open port served by database topic (str): topic name """ host = StringProperty(title='Host', default='[[KAFKA_HOST]]') port = IntProperty(title='Port', default=9092) topic = StringProperty(title='Topic', default="", allow_none=False) def __init__(self): super().__init__() self._kafka = None self._encoded_topic = None def configure(self, context): super().configure(context) if not len(self.topic()): raise ValueError("Topic cannot be empty") self._connect() def stop(self): self._disconnect() super().stop() def _connect(self): self._kafka = KafkaClient("{0}:{1}".format(self.host(), self.port())) self._encoded_topic = self.topic() # ensuring topic is valid try: self._kafka.ensure_topic_exists(self._encoded_topic) except Exception: self.logger.exception("Topic: {0} does not exist" .format(self.topic())) raise def _disconnect(self): if self._kafka: self._kafka.close() self._kafka = None @property def connected(self): return self._kafka
def main(): kafka = KafkaClient("localhost:9092") producer = SimpleProducer(kafka) topic = b'test' msg = b'Hello World from Me!' try: print_response(producer.send_messages(topic, msg)) except LeaderNotAvailableError: # https://github.com/mumrah/kafka-python/issues/249 time.sleep(1) print_response(producer.send_messages(topic, msg)) kafka.close()
def test_send_produce_request_raises_when_topic_unknown(self, protocol, conn): conn.recv.return_value = "response" # anything but None brokers = [BrokerMetadata(0, "broker_1", 4567), BrokerMetadata(1, "broker_2", 5678)] topics = [TopicMetadata("topic_doesnt_exist", UNKNOWN_TOPIC_OR_PARTITION, [])] protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics) client = KafkaClient(hosts=["broker_1:4567"]) requests = [ProduceRequest("topic_doesnt_exist", 0, [create_message("a"), create_message("b")])] with self.assertRaises(UnknownTopicOrPartitionError): client.send_produce_request(requests)
class Consumer(object): def __init__(self, topic): self.kafka = KafkaClient("localhost:9092") self.consumer = SimpleConsumer(self.kafka, "1", topic) @classmethod def make_queue(cls): return Queue(4096) def run(self, q): try: for i in self.consumer: q.put(json.loads(i.message.value), True) except Exception as e: self.kafka.close()
def _run(self): pcount = 0 pause = False while True: try: if pause: gevent.sleep(2) pause = False self._logger.error("New KafkaClient %s" % self._topic) self._kfk = KafkaClient(self._brokers, "kc-" + self._topic, timeout=5) self._failed = False try: consumer = SimpleConsumer(self._kfk, self._group, self._topic,\ buffer_size = 4096*4*4, max_buffer_size=4096*32*4) except Exception as ex: template = "Consumer Failure {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.error("Error: %s trace %s" % \ (messag, traceback.format_exc())) self._failed = True raise RuntimeError(messag) self._logger.error("Starting %s" % self._topic) # Find the offset of the last message that has been queued consumer.seek(-1, 2) try: mi = consumer.get_message(timeout=0.1) consumer.commit() except common.OffsetOutOfRangeError: mi = None #import pdb; pdb.set_trace() self._logger.info("Last Queued for %s is %s" % \ (self._topic,str(mi))) # start reading from last previously processed message if mi != None: consumer.seek(-1, 1) else: consumer.seek(0, 0) if self._limit: raise gevent.GreenletExit while True: try: mlist = consumer.get_messages(10, timeout=0.5) if not self.msg_handler(mlist): raise gevent.GreenletExit pcount += len(mlist) except TypeError as ex: self._logger.error("Type Error: %s trace %s" % \ (str(ex.args), traceback.format_exc())) gevent.sleep(0.1) except common.FailedPayloadsError as ex: self._logger.error("Payload Error: %s" % str(ex.args)) gevent.sleep(0.1) except gevent.GreenletExit: break except AssertionError as ex: self._partoffset = ex break except Exception as ex: template = "An exception of type {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.error("%s : traceback %s" % \ (messag, traceback.format_exc())) self.stop_partition() self._failed = True pause = True if hasattr(ex, 'errno'): # This is an unrecoverable error if ex.errno == errno.EMFILE: raise SystemExit(1) self._logger.error("Stopping %s pcount %d" % (self._topic, pcount)) partdb = self.stop_partition() return self._partoffset, partdb
class KafkaBaseMonitor(BaseMonitor): ''' Base monitor for handling outbound Kafka results ''' def setup(self, settings): ''' Setup the handler @param settings: The loaded settings file ''' @MethodTimer.timeout(settings['KAFKA_CONN_TIMEOUT'], False) def _hidden_setup(): try: # set up kafka self.kafka_conn = KafkaClient(settings['KAFKA_HOSTS']) self.producer = SimpleProducer(self.kafka_conn) self.topic_prefix = settings['KAFKA_TOPIC_PREFIX'] except KafkaUnavailableError as ex: message = "An exception '{0}' occured while setting up kafka. "\ "Arguments:\n{1!r}".format(type(ex).__name__, ex.args) self.logger.error(message) return False return True ret_val = _hidden_setup() self.use_appid_topics = settings['KAFKA_APPID_TOPICS'] if ret_val: self.logger.debug( "Successfully connected to Kafka in {name}".format( name=self.__class__.__name__)) else: self.logger.error( "Failed to set up Kafka Connection in {name} " "within timeout".format(name=self.__class__.__name__)) # this is essential to running the redis monitor sys.exit(1) def _send_to_kafka(self, master): ''' Sends the message back to Kafka @param master: the final dict to send @returns: True if successfully sent to kafka ''' appid_topic = "{prefix}.outbound_{appid}".format( prefix=self.topic_prefix, appid=master['appid']) firehose_topic = "{prefix}.outbound_firehose".format( prefix=self.topic_prefix) try: self.kafka_conn.ensure_topic_exists(firehose_topic) # dont want logger in outbound kafka message dump = json.dumps(master) if self.use_appid_topics: self.kafka_conn.ensure_topic_exists(appid_topic) self.producer.send_messages(appid_topic, dump) self.producer.send_messages(firehose_topic, dump) return True except Exception as ex: message = "An exception '{0}' occured while sending a message " \ "to kafka. Arguments:\n{1!r}" \ .format(type(ex).__name__, ex.args) self.logger.error(message) return False
def __init__(self): self.client = KafkaClient("localhost", 9092) self.producers = {}
app = Flask(__name__) logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) logger.info('Starting Monitor Server Python') # load json schema with open('schemas/atmosphere_tma-m_schema.json') as f: tma_m_schema = json.load(f) logger.debug('Schema loaded %s', tma_m_schema) validator = Draft4Validator(tma_m_schema) logger.info('Validator initialized %s', validator) #Connect Kafka client to Kafka pod kafka = KafkaClient('kafka-0.kafka-hs.default.svc.cluster.local:9093') # Incialize producer structure to send messages producer = SimpleProducer(kafka) @app.route('/monitor', methods=['POST']) def process_message(): # load json file input = request.get_json(force=True) logger.info('Processing Request %s', input) return validate_schema(input) @app.route('/monitor_demo', methods=['POST'])
class Aria2Dispatcher: def __init__(self, host, topic, consumer_id, settings): self.host = host self.topic = topic self.consumer_id = consumer_id or "Aria2Dispatcher" self.settings = importlib.import_module(settings[:-3]) self.kafka_client = KafkaClient(self.settings.KAFKA_HOSTS) self.producer = SimpleProducer(self.kafka_client) self.topic_prefix = self.settings.KAFKA_TOPIC_PREFIX self.topic_list = [] self.aria2_clients = [] for x in self.settings.ARIA2_ADDRESSES: rpc_uri = "ws://%s/jsonrpc" % x try: aria2_connection = create_connection(rpc_uri) self.aria2_clients.append({ 'rpc_uri': rpc_uri, 'ws': aria2_connection }) except: logger.error('create aria2_connection error!') raise def _process_item(self, item, aria2_client_index): prefix = self.topic_prefix crawled_firehose_images_topic = "{prefix}.crawled_firehose_images".format( prefix=prefix) if 'updates' in item['meta']['collection_name']: message = json.dumps(item) print("in..... if 'updates' in item['meta']['collection_name']:") print('collection_name::', item['meta']['collection_name']) else: self._process_item_images(item, aria2_client_index) try: if 'images' in item and len(item['images']) > 0: message = json.dumps(item) else: message = 'no images.' except: message = 'json failed to parse' logger.error(message) self._check_topic(crawled_firehose_images_topic) self.producer.send_messages(crawled_firehose_images_topic, message) logger.info("send message to kafka topic:: %s " % crawled_firehose_images_topic) logger.info("message= %s" % message) def _process_item_images(self, item, aria2_client_index): image_urls = item["image_urls"] if len(image_urls) > 0: req_methods = [] images = [] for url in image_urls: filename, file_ext = splitext(basename(urlparse(url).path)) if len(file_ext) == 0: file_ext = ".jpg" out_file_name_base = sha1(url) out_file_name = "%s%s" % (out_file_name_base, file_ext) dir_name = '%s/%s/%s/%s/%s' % ( self.settings.IMAGES_STORE, item['meta']['spiderid'], out_file_name_base[:3], out_file_name_base[3:6], out_file_name_base[6:]) options = dict(dir=dir_name, out=out_file_name) if not exists(dir_name + '/' + out_file_name): req_methods.append({ "methodName": "aria2.addUri", "params": [[url], options] }) images.append({ 'url': url, 'path': "%s/%s" % (dir_name, out_file_name), 'aria2': { 'rpc_uri': self.aria2_clients[aria2_client_index]['rpc_uri'] } }) req = { "jsonrpc": 2, "id": str(uuid.uuid1()), "method": "system.multicall", "params": [req_methods] } jsonreq = json.dumps(req) try: self.aria2_clients[aria2_client_index]['ws'].send(jsonreq) resp = self.aria2_clients[aria2_client_index]['ws'].recv() ws_resp = json.loads(resp) print('resp:', resp) logger.info('resp:: %s ' % resp) for image, gid in zip(images, map(lambda x: x[0], ws_resp['result'])): image['aria2']['gid'] = gid except Exception as err: print('error::', err) logger.error(err) item['images'] = images def _check_topic(self, topic_name): if topic_name not in self.topic_list: self.kafka_client.ensure_topic_exists(topic_name) self.topic_list.append(topic_name) def dispatch(self): consumer = SimpleConsumer( self.kafka_client, self.consumer_id, self.topic, buffer_size=1024 * 100, # 100kb fetch_size_bytes=1024 * 100, # 100kb max_buffer_size=None # eliminate big message errors ) consumer.seek(0, 1) i = 0 while True: try: message = consumer.get_message() if message is None: print datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S"), ' message is None:' logger.info('message is None.') time.sleep(1) continue val = message.message.value try: item = json.loads(val) i += 1 self._process_item(item, i % len(self.aria2_clients)) except: print("error heppened in loads val to process : %s" % val) logger.error("error heppened in loads val to process: %s" % val) continue except: traceback.print_exc() break self.kafka_client.close() return 0
def startConnection(self): idPlusPort = self.IP + ":" + str(self.port) kafka = KafkaClient(idPlusPort) self.producer = SimpleProducer(kafka, async=True)
import json, sys, os from kafka import SimpleProducer, KafkaClient import json, pika sample = {'imageMatch': 'true', 'fileName': 'thilagaTest'} kafka = KafkaClient('10.6.4.36:9092') producer = SimpleProducer(kafka) data = json.dumps(sample) producer.send_messages(b'geoAttendance', data) credentials = pika.PlainCredentials('guest', 'guest') parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) connection = pika.BlockingConnection(parameters) channel = connection.channel() channel.queue_declare(queue='hello') channel.basic_publish(exchange='', routing_key='hello', body=data) print(" [x] Sent 'done.....!!!'") connection.close()
from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream from kafka import SimpleProducer, KafkaClient access_token = "1072837803908784129-Pv2y0HZlUcJVyHmePC5KtLIVM0ZHz6" access_token_secret = "ImJh7Q2oMhHoHyWsln2BjMFWouYQCliPik9BkE1wymk6H" consumer_key = "WFRKqgnMYAIXB1NkOhvttsNMY" consumer_secret = "X4zj67WfzYt0paiqOpukFW7wb4wWeazttwW98GNmvEyLW9tvrA" class StdOutListener(StreamListener): def on_data(self, data): producer.send_messages("trump", data.encode('utf-8')) print (data) return True def on_error(self, status): print (status) kafka = KafkaClient("localhost:9092") producer = SimpleProducer(kafka) l = StdOutListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, l) stream.filter(track="trump") /home/ubuntu/Documents/ghezloo/kafka-spark-cassandra/test.py
from kafka import SimpleConsumer, SimpleClient from kafka import KafkaConsumer from kafka import KafkaClient group_name = "my-group" topic_name = "fast-messages" kafka = KafkaClient('127.0.0.1:9092') consumer = KafkaConsumer(kafka, topic_name, group_id=group_name) print "Created consumer for group: [%s] and topic: [%s]" % (group_name, topic_name) print "Waiting for messages..." for msg in consumer: print msg
def getProducer(self): if self.producer is None: kafka = KafkaClient(kafka_server) self.producer = SimpleProducer(kafka) return self.producer
data= {"srDetails": {"betaType": "PythonTesting", "build": "PythonTesting", "caseId": "9999-9999-P-9988", "ccEngineer": "PythonTesting", "ccList": "PythonTesting", "contractId": "PythonTesting", "contractStatus": "PythonTesting", "country": "PythonTesting", "courtesyDescription": "PythonTesting", "courtesykey": "PythonTesting", "criticalIssue": "PythonTesting", "criticalOutage": "PythonTesting", "customerCaseNumber": "PythonTesting", "cve": "PythonTesting", "cvss": "PythonTesting", "description": "PythonTesting", "employeeEmail": "PythonTesting", "employeeId": "PythonTesting", "endDate": "PythonTesting", "entitledSerialNumber": "PythonTesting", "entitlementChecked": "PythonTesting", "entitlementServiceLevel": "PythonTesting", "entitlementSource": "PythonTesting", "escalation": "PythonTesting", "escalationLevelDescription": "PythonTesting", "escalationLevelKey": "PythonTesting", "escalationkey": "PythonTesting", "externallyReported": "PythonTesting", "followupMethod": "PythonTesting", "followupMethodKey": "PythonTesting", "internalUse": "PythonTesting", "jsaAdvisoryBoard": "PythonTesting", "jtac": "PythonTesting", "knowledgeArticle": "PythonTesting", "numberOfSystemsAffected": "PythonTesting", "numberOfUsersAffected": "PythonTesting", "ouatgeCauseDescription": "PythonTesting", "outageCauseKey": "PythonTesting", "outageDescription": "PythonTesting", "outageImpactDescription": "PythonTesting", "outageImpactKey": "PythonTesting", "outageInfoAvailable": "PythonTesting", "outageKey": "PythonTesting", "outageTypeDescription": "PythonTesting", "outageTypeKey": "PythonTesting", "outsourcer": "PythonTesting", "overideOutage": "PythonTesting", "partnerFunction": [{"partnerFunctionKey": "00000001", "partnerFunctionName": "Sold-To Party", "partnerId": "100000151", "partnerName": "CENTURYLINK, INC"}, {"partnerFunctionName":"Employee Responsible","partnerFunctionKey":"00000014","partnerId":"0000018961","partnerName":"Vidhya Sadasivam"}], "platform": "PythonTesting", "previousOwnerSkill": "PythonTesting", "previousTeam": "PythonTesting", "priority": "PythonTesting", "priorityKey": "PythonTesting", "processType": "PythonTesting", "processTypeDescription": "PythonTesting", "productId": "PythonTesting", "productSeries": "PythonTesting", "raFa": "PythonTesting", "reason": "PythonTesting", "release": "PythonTesting", "reporterDetails": "PythonTesting", "routerName": "PythonTesting", "secVulnerability": "PythonTesting", "serialNumber": "PythonTesting", "serviceProduct": "PythonTesting", "severity": "PythonTesting", "severityKey": "PythonTesting", "sirtBundle": "PythonTesting", "sku": "PythonTesting", "smeContact": "PythonTesting", "software": "PythonTesting", "specialRelease": "PythonTesting", "srCategory1": "PythonTesting", "srCategory2": "PythonTesting", "srCategory3": "PythonTesting", "srCategory4": "PythonTesting", "srReqDate": [{"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}], "startDate": "PythonTesting", "status": "PythonTesting", "statusKey": "PythonTesting", "technicalCategory1": "PythonTesting", "technicalCategory2": "PythonTesting", "technicalCategory3": "PythonTesting", "technicalCategory4": "PythonTesting", "temperature": "PythonTesting", "theaterDescription": "PythonTesting", "theaterKey": "PythonTesting", "top5": "PythonTesting", "totalOutageTime": "PythonTesting", "urgency": "PythonTesting", "urgencyKey": "PythonTesting", "version": "PythonTesting", "viaDescription": "PythonTesting", "viaKey": "PythonTesting", "warrantyEndDate": "PythonTesting", "yearRoundSupport": "PythonTesting", "zzQ1": "PythonTesting", "zzQ10": "PythonTesting", "zzQ2": "PythonTesting", "zzQ3": "PythonTesting", "zzQ4": "PythonTesting", "zzQ5": "PythonTesting", "zzQ6": "PythonTesting", "zzQ7": "PythonTesting", "zzQ8": "PythonTesting", "zzQ9": "PythonTesting"}} att = dict(srAttachements={"caseId": "2015-1004-T-0021", "attachment": {"createdBy": "CMUSER", "dateCreated": "Mon+Oct+12+18%3A44%3A20+UTC+2015", "fileType": "", "path": "/archive/attachments/OETCLR/2015/10/04/T/20151012184420", "private": "", "sequenceNumber": "0002141498", "size": 19, "title": "Closed_JSA_Cases.xlsx", "uploadedBy": "*****@*****.**", "zDate": 20151012, "zTime": 114422}}) att2 = dict(srAttachements={"caseId": "9999-9999-P-9993", "attachment": {"sequenceNumber": "0000026175", "title": "DK7444.txt", "zTime": 73411, "fileType": "", "private": "", "dateCreated": "20110525 073411", "createdBy": "", "path": "/archive/attachments/PCLR/2011/05/25/0334/1306334051497", "zDate": 20110525, "uploadedBy": "", "size": 8}}) # To send messages synchronously kafka = KafkaClient('172.22.147.232:9092,172.22.147.242:9092,172.22.147.243:9092') producer = SimpleProducer(kafka) # Note that the application is responsible for encoding messages to type bytes print producer.send_messages(b'SAPEvent', json.dumps(sr)) time.sleep(1) print "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" try: document = sr row = [] utils = Utils() row = utils.validate_sr_details( document['srDetails'], row ) except Exception: print Exception.message print(traceback.format_exc()) print "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
global lock_socket lock_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) try: lock_socket.bind('\0' + process_name) print 'Lock acquired' except socket.error: print 'Process already running. Exiting..' sys.exit() get_lock('twitter streaming') logging.basicConfig() kafka = KafkaClient("localhost:9092") tempfile_path = None tempfile = None batch_counter = 0 timestamp = None # def get_topics(zookeeper_hosts, topic_regex): # """Uses shell zookeeper-client to read Kafka topics matching topic_regex from ZooKeeper.""" # command = "/usr/bin/zookeeper-client -server %s ls /brokers/topics | tail -n 1 | tr '[],' ' '" % ','.join(zookeeper_hosts) # topics = os.popen(command).read().strip().split() # matched_topics = [ topic for topic in topics if re.match(topic_regex, topic) ] # return matched_topics def standardized_timestamp(frequency, dt=None): '''
self._logger.info("%s" % messag) return False else: self._callback(self._partno, chg) return True if __name__ == '__main__': logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') workers = {} brokers = "localhost:9092,localhost:9093,localhost:9094" group = "workers" kafka = KafkaClient(brokers, str(os.getpid())) cons = SimpleConsumer(kafka, group, "ctrl") cons.provide_partition_info() print "Starting control" end_ready = False while end_ready == False: try: while True: part, mmm = cons.get_message(timeout=None) mm = mmm.message print "Consumed ctrl " + str(mm) if mm.value == "start": if workers.has_key(mm.key): print "Dup partition %s" % mm.key raise ValueError else:
def init_kafka(): global kafkaProducer (url) = config.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default kafkaProducer = SimpleProducer(kafka)
def main(): # initial main parser setup parser = argparse.ArgumentParser( description='Kafka Dump: Scrapy Cluster Kafka topic dump utility for ' 'debugging.', add_help=False) parser.add_argument('-h', '--help', action=ArgparseHelper, help='show this help message and exit') subparsers = parser.add_subparsers(help='commands', dest='command') # args to use for all commands base_parser = argparse.ArgumentParser(add_help=False) base_parser.add_argument('-kh', '--kafka-host', action='store', required=False, help="The override Kafka host") base_parser.add_argument('-s', '--settings', action='store', required=False, help="The settings file to read from", default="localsettings.py") base_parser.add_argument( '-ll', '--log-level', action='store', required=False, help="The log level", default=None, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']) # list command list_parser = subparsers.add_parser('list', help='List all Kafka topics', parents=[base_parser]) # dump command dump_parser = subparsers.add_parser('dump', help='Dump a Kafka topic', parents=[base_parser]) dump_parser.add_argument('-t', '--topic', action='store', required=True, help="The Kafka topic to read from") dump_parser.add_argument('-c', '--consumer', action='store', required=False, default=None, help="The Kafka consumer id to use") dump_parser.add_argument('-b', '--from-beginning', action='store_const', required=False, const=True, help="Read the topic from the beginning") dump_parser.add_argument('-nb', '--no-body', action='store_const', required=False, const=True, default=False, help="Do not include the raw html 'body' key in" " the json dump of the topic") dump_parser.add_argument('-p', '--pretty', action='store_const', required=False, const=True, default=False, help="Pretty print the json objects consumed") dump_parser.add_argument('-d', '--decode-base64', action='store_const', required=False, const=True, default=False, help="Decode the base64 encoded raw html body") args = vars(parser.parse_args()) wrapper = SettingsWrapper() settings = wrapper.load(args['settings']) kafka_host = args['kafka_host'] if args['kafka_host'] else settings[ 'KAFKA_HOSTS'] log_level = args['log_level'] if args['log_level'] else settings[ 'LOG_LEVEL'] logger = LogFactory.get_instance(level=log_level, name='kafkadump') if args['command'] == 'list': try: logger.debug("Connecting to {0}...".format(kafka_host)) kafka = KafkaClient(kafka_host) logger.info("Connected to {0}".format(kafka_host)) except KafkaUnavailableError as ex: message = "An exception '{0}' occured. Arguments:\n{1!r}" \ .format(type(ex).__name__, ex.args) logger.error(message) sys.exit(1) logger.debug('Running list command') print("Topics:") for topic in list(kafka.topic_partitions.keys()): print("-", topic) kafka.close() return 0 elif args['command'] == 'dump': logger.debug('Running dump command') topic = args["topic"] consumer_id = args["consumer"] try: logger.debug("Getting Kafka consumer") offset = 'earliest' if args["from_beginning"] else 'latest' consumer = KafkaConsumer( topic, group_id=consumer_id, bootstrap_servers=kafka_host, consumer_timeout_ms=settings['KAFKA_CONSUMER_TIMEOUT'], auto_offset_reset=offset, auto_commit_interval_ms=settings[ 'KAFKA_CONSUMER_COMMIT_INTERVAL_MS'], enable_auto_commit=settings[ 'KAFKA_CONSUMER_AUTO_COMMIT_ENABLE'], max_partition_fetch_bytes=settings[ 'KAFKA_CONSUMER_FETCH_MESSAGE_MAX_BYTES']) except NoBrokersAvailable as ex: logger.error('Unable to connect to Kafka') sys.exit(1) num_records = 0 total_bytes = 0 item = None while True: try: for message in consumer: if message is None: logger.debug("no message") break logger.debug("Received message") val = message.value try: item = json.loads(val) if args['decode_base64'] and 'body' in item: item['body'] = base64.b64decode(item['body']) if args['no_body'] and 'body' in item: del item['body'] except ValueError: logger.info("Message is not a JSON object") item = val file1 = open("myfile.txt", "w") body_bytes = len(item) if args['pretty']: print(json.dumps(item, indent=4)) else: print(item) file1.write(item) num_records = num_records + 1 total_bytes = total_bytes + body_bytes except KeyboardInterrupt: logger.debug("Keyboard interrupt received") break except: logger.error(traceback.print_exc()) break total_mbs = old_div(float(total_bytes), (1024 * 1024)) if item is not None: print("Last item:") print(json.dumps(item, indent=4)) if num_records > 0: logger.info( "Num Records: {n}, Total MBs: {m}, kb per message: {kb}". format(n=num_records, m=total_mbs, kb=(float(total_bytes) / num_records / 1024))) else: logger.info("No records consumed") num_records = 0 logger.info("Closing Kafka connection") try: consumer.close() except: # Exception is thrown when group_id is None. # See https://github.com/dpkp/kafka-python/issues/619 pass return 0
def __init__(self): client = KafkaClient("localhost:9092") self.producer = SimpleProducer(client, async = True, batch_send_every_n = 1000, batch_send_every_t = 10)
class EpidataStreamingContext: def __init__(self, sc=None, ssc=None, sql_ctx=None, topics=None, brokers=None, cassandra_conf=None, measurement_class=None): self._sc = sc self._sql_ctx = sql_ctx self._topics = topics self._ssc = ssc self._brokers = brokers self._cassandra_conf = cassandra_conf self._measurement_class = measurement_class # set up Schema self._sensor_measurement_schema = SensorMeasurement.get_schema() self._sensor_measurement_stats_schema = SensorMeasurement.get_stats_schema( ) self._automated_test_schema = AutomatedTest.get_schema() self._automated_test_stats_schema = AutomatedTest.get_stats_schema() self._kafka_producer = KafkaProducer(bootstrap_servers=self._brokers) self._client = KafkaClient(self._brokers) def run_stream(self, ops, clean_up=True): self._client.ensure_topic_exists(self._topics) kvs = KafkaUtils.createDirectStream( self._ssc, [self._topics], {"metadata.broker.list": self._brokers}) if self._measurement_class == "sensor_measurement": rows = kvs.map(SensorMeasurement.to_row) elif self._measurement_class == "automated_test": rows = kvs.map(AutomatedTest.to_row) def process(time, rdd): if rdd.isEmpty() == False: rdd_df = self._sql_ctx.createDataFrame(rdd) # convert to panda dataframe panda_df = ConvertUtils.convert_to_pandas_dataframe_model( rdd_df, clean_up) # perform all transformation and save it to cassandra for op in ops: # try: # apply transformation output_df = op.apply(panda_df, self._sql_ctx) if not output_df.empty: if op.datastore() == "cassandra": # clean up unnecessary column output_df = ConvertUtils.convert_meas_value( output_df, op.destination()) # convert it back to spark data frame spark_output_df = self._sql_ctx.createDataFrame( output_df, self._get_schema(op.destination())) # convert to db model to save to cassandra output_df_db = self._convert_to_db_model( spark_output_df, op.destination()) # save to cassandra output_df_db.write.format( "org.apache.spark.sql.cassandra" ).mode('append').options( table=op.destination(), keyspace=self._cassandra_conf['keyspace'], user=self._cassandra_conf['user'], password=self._cassandra_conf['password'] ).save() elif op.datastore() == "kafka": output_df_kafka = output_df for i in output_df_kafka.index: row_json = output_df_kafka.loc[i].to_json() # push to kafka self._kafka_producer.send( op.destination(), row_json) # Flush kakfa producer self._kafka_producer.flush() # except BaseException: # print("Failed transformation: " + op.destination()) rows.foreachRDD(process) def _start(self): self._ssc.start() self._ssc.awaitTermination() def _get_schema(self, destination): if destination == "measurements_summary": if self._measurement_class == "sensor_measurement": return self._sensor_measurement_stats_schema elif self._measurement_class == "automated_test": return self._automated_test_stats_schema else: if self._measurement_class == "sensor_measurement": return self._sensor_measurement_schema elif self._measurement_class == "automated_test": return self._automated_test_schema def _convert_to_db_model(self, input_df, dest): if self._measurement_class == "sensor_measurement": return SensorMeasurement.convert_to_db_model(input_df, dest) elif self._measurement_class == "automated_test": return AutomatedTest.convert_to_db_model(input_df, dest)
def start_kafka(zk_client_port, broker_listen_port, broker_id=0): if not os.path.exists(kafka_bdir): output, _ = call_command_("mkdir " + kafka_bdir) kafka_download = 'wget -P ' + kafka_bdir + ' http://download.nextag.com/apache/kafka/0.8.1.1/kafka_2.9.2-0.8.1.1.tgz' if not os.path.exists(kafka_bdir + '/kafka_2.9.2-0.8.1.1.tgz'): process = subprocess.Popen(kafka_download.split(' ')) process.wait() if process.returncode is not 0: return False basefile = 'kafka_2.9.2-0.8.1.1' kafkabase = "/tmp/kafka.%s.%d/" % (os.getenv('USER', 'None'), broker_listen_port) confdir = kafkabase + basefile + "/config/" output, _ = call_command_("rm -rf " + kafkabase) output, _ = call_command_("mkdir " + kafkabase) logging.info('Installing kafka in ' + kafkabase) os.system("cat " + kafka_bdir + '/kafka_2.9.2-0.8.1.1.tgz' + " | tar -xpzf - -C " + kafkabase) logging.info('kafka Port %d' % broker_listen_port) #Replace the brokerid and port # in the config file replace_string_( confdir + "server.properties", [("broker.id=0", "broker.id=" + str(broker_id)), ("port=9092", "port=" + str(broker_listen_port)), ("zookeeper.connect=localhost:2181", "zookeeper.connect=localhost:%d" % zk_client_port), ("log.dirs=/tmp/kafka-logs", "log.dirs=" + kafkabase + "logs")]) replace_string_(kafkabase + basefile + "/bin/kafka-server-stop.sh", [("grep -v grep", "grep %s | grep -v grep" % kafkabase)]) replace_string_(kafkabase + basefile + "/bin/kafka-server-stop.sh", [("SIGINT", "SIGKILL")]) replace_string_(kafkabase + basefile + "/bin/kafka-server-stop.sh", [("#!/bin/sh", "#!/bin/sh -x")]) output, _ = call_command_("chmod +x " + kafkabase + basefile + "/bin/kafka-server-stop.sh") # Extra options for JMX : -Djava.net.preferIPv4Stack=true -Djava.rmi.server.hostname=xx.xx.xx.xx output, _ = call_command_(kafkabase + basefile + "/bin/kafka-server-start.sh -daemon " + kafkabase + basefile + "/config/server.properties") count = 0 start_wait = os.getenv('CONTRIAL_ANALYTICS_TEST_MAX_START_WAIT_TIME', 15) while count < start_wait: try: logging.info('Trying to connect...') kk = KafkaClient("localhost:%d" % broker_listen_port) except: count += 1 time.sleep(1) else: return True logging.info("Kafka client cannot connect. Kafka logfile below:") with open(kafkabase + basefile + "/logs/kafkaServer.out", 'r') as fin: logging.info(fin.read()) return False
def check(self, instance): consumer_groups = self.read_config(instance, 'consumer_groups', cast=self._validate_consumer_groups) zk_connect_str = self.read_config(instance, 'zk_connect_str') kafka_host_ports = self.read_config(instance, 'kafka_connect_str') # Construct the Zookeeper path pattern zk_prefix = instance.get('zk_prefix', '') zk_path_tmpl = zk_prefix + '/consumers/%s/offsets/%s/%s' # Connect to Zookeeper zk_conn = KazooClient(zk_connect_str, timeout=self.zk_timeout) zk_conn.start() try: # Query Zookeeper for consumer offsets consumer_offsets = {} topics = defaultdict(set) for consumer_group, topic_partitions in consumer_groups.iteritems(): for topic, partitions in topic_partitions.iteritems(): # Remember the topic partitions that we've see so that we can # look up their broker offsets later topics[topic].update(set(partitions)) for partition in partitions: zk_path = zk_path_tmpl % (consumer_group, topic, partition) try: consumer_offset = int(zk_conn.get(zk_path)[0]) key = (consumer_group, topic, partition) consumer_offsets[key] = consumer_offset except NoNodeError: self.log.warn('No zookeeper node at %s' % zk_path) except Exception: self.log.exception('Could not read consumer offset from %s' % zk_path) finally: try: zk_conn.stop() zk_conn.close() except Exception: self.log.exception('Error cleaning up Zookeeper connection') # Connect to Kafka kafka_conn = KafkaClient(kafka_host_ports, timeout=self.kafka_timeout) try: # Query Kafka for the broker offsets broker_offsets = {} for topic, partitions in topics.items(): offset_responses = kafka_conn.send_offset_request([ OffsetRequest(topic, p, -1, 1) for p in partitions]) for resp in offset_responses: broker_offsets[(resp.topic, resp.partition)] = resp.offsets[0] finally: try: kafka_conn.close() except Exception: self.log.exception('Error cleaning up Kafka connection') # Report the broker data for (topic, partition), broker_offset in broker_offsets.items(): broker_tags = ['topic:%s' % topic, 'partition:%s' % partition] broker_offset = broker_offsets.get((topic, partition)) self.gauge('kafka.broker_offset', broker_offset, tags=broker_tags) # Report the consumer for (consumer_group, topic, partition), consumer_offset in consumer_offsets.items(): # Get the broker offset broker_offset = broker_offsets.get((topic, partition)) # Report the consumer offset and lag tags = ['topic:%s' % topic, 'partition:%s' % partition, 'consumer_group:%s' % consumer_group] self.gauge('kafka.consumer_offset', consumer_offset, tags=tags) self.gauge('kafka.consumer_lag', broker_offset - consumer_offset, tags=tags)
from kafka import KafkaClient, SimpleProducer, SimpleConsumer # To send messages synchronously kafka = KafkaClient("cloud.soumet.com:9092") producer = SimpleProducer(kafka) # Note that the application is responsible for encoding messages to type str #producer.send_messages("bitcoin", "some message") consumer = SimpleConsumer(kafka, "consumer", "bitcoin_exchange_tmp", max_buffer_size=1310720000) for message in consumer: # message is raw byte string -- decode if necessary! # e.g., for unicode: `message.decode('utf-8')` print(message) #kafka.close()
def __init__(self, kafkaBroker, kafkaTopic): self.broker = kafkaBroker self.topic = kafkaTopic self.client = KafkaClient(self.broker) self.producer = SimpleProducer(self.client)
################################################### # My own access tokens #################################################### ACCESS_TOKEN = '28778811-sw3jVlgjtS14kvquuo765rjaIYvCE0iMpTsDXdiRs' ACCESS_SECRET = 'HBGjT0uixYSC6PXvyewvBuFmHv4FYtU6UmsDG98khY' CONSUMER_KEY = '2VsrZwlSbtToGYbpHe42hmB36' CONSUMER_SECRET = 'vuXhfCmMVMwecUzV3hwK8vvkGWZnAM5wtEDvzMMenq6rH8yFqe' my_auth = requests_oauthlib.OAuth1(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_SECRET) #################################################### # Kafka Producer #################################################### twitter_topic = "twitter_topic" client = KafkaClient("10.128.0.2:9092") producer = SimpleProducer(client) def get_tweets(): print( "#########################get_tweets called################################" ) url = 'https://stream.twitter.com/1.1/statuses/filter.json' #query_data = [('language', 'en'), ('locations', '-130,-20,100,50'),('track','#')] #query_data = [('language', 'en'), ('locations', '-3.7834,40.3735,-3.6233,40.4702'),('track','#')] query_data = [('language', 'en'), ('locations', '-3.7834,40.3735,-3.6233,40.4702'), ('track', 'Madrid')] query_url = url + '?' + '&'.join( [str(t[0]) + '=' + str(t[1]) for t in query_data])
from kafka import KafkaClient, SimpleProducer, SimpleConsumer kafka = KafkaClient("localhost:2181") producer = SimpleProducer(kafka) producer.send_messages("test1", b"Hello world!")
def __init__(self, api): self.api = api super(StreamListener, self).__init__() client = KafkaClient("localhost:9092") self.producer = KafkaProducer(value_serializer=lambda m: json.dumps(m).encode('utf-8'))
import streaming_generator from kafka import SimpleProducer, KafkaClient def timed_call(fn, calls_per_second, *args, **kwargs): start = time.time() fn(*args, **kwargs) fn_time = time.time() - start sleep_duration = max(0, (1.0 - calls_per_second * fn_time) / calls_per_second) print sleep_duration while True: fn(*args, **kwargs) time.sleep(sleep_duration) def send_message(producer, topic): message_raw = streaming_generator.gen_random_message() producer.send_messages(topic, json.dumps({'user_id': message_raw[0], 'activity': message_raw[1]})) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-m', '--messages', default=1000) parser.add_argument('-z', '--host', default="127.0.0.1:9092") parser.add_argument('-t', '--topic', default='messages') args = parser.parse_args() kafka = KafkaClient(args.host) producer = SimpleProducer(kafka) timed_call(send_message, args.messages, producer, args.topic)
from kafka import SimpleProducer, KafkaClient import sys import csv import json import time import datetime import re # To send messages synchronously #kafka = KafkaClient('slc08use.us.oracle.com:9092') kafka = KafkaClient('localhost:9092') producer = SimpleProducer(kafka) topic = 'test0730' table = 'taxi_200K' lines = [x.strip() for x in open(table + '.csv').readlines()] print 'create external table', table, '(' columns = lines[0].split('|') print ',\n'.join([' ' + col + ' string' for col in columns]) print ')' print 'ROW FORMAT DELIMITED FIELDS TERMINATED BY "|"' print "location '/user/jiezhen/camus/topics/test0730/daily/2015/08/04';" for line in lines[1:]: if line: try: producer.send_messages(bytes(topic), line) except LeaderNotAvailableError: print "pausing to allow Kafka time to create topic" time.sleep(1)
def __init__(self): self._brokers = APP_CONFIG["rti_kafka"]["brokers"] self._partitions = APP_CONFIG["rti_kafka"]["partitions"] self._topic = APP_CONFIG["rti_kafka"]["topic"] self._kafka = KafkaClient(self._brokers) self.producer = None
from kafka import KafkaProducer, KafkaConsumer from kafka import KafkaClient, SimpleConsumer import time import sys from app import app from flask import Flask, render_template, request, redirect, Response import random, json SEND_TIME = None print("At top of function \n\n") RECEIVE_TIME = None bs = ['54.218.73.149:9092','50.112.197.74:9092','34.222.135.111:9092'] PRODUCER = KafkaProducer(bootstrap_servers=bs) CLIENT = KafkaClient(bs) @app.route('/') def home(): return render_template('setuser.html') @app.route('/<user>') def serve_user(user): consumer = SimpleConsumer(CLIENT, 'testing', 'user{}_sess{}'.format(user,user)) msg = None msg = consumer.get_message() RECEIVE_TIME = time.time() color='yellow'
from kafka import SimpleProducer, KafkaClient from kafka import KafkaConsumer import logging import sys logging.basicConfig( format= '%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s', level=logging.WARNING) server = "ec2-54-171-154-70.eu-west-1.compute.amazonaws.com" kafka = KafkaClient('%s:9092' % server) producer = SimpleProducer(kafka) # To consume messages consumer = KafkaConsumer('rousseau', group_id='my_group', bootstrap_servers=['%s:9092' % server]) # Note that the application is responsible for encoding messages to type bytes producer.send_messages(b'rousseau', sys.argv[1]) for message in consumer: # # message value is raw byte string -- decode if necessary! # # e.g., for unicode: `message.value.decode('utf-8')` print("%s" % message.value) break
def start_kafka(zk_client_port, broker_listen_port, broker_id=0): if not os.path.exists(kafka_bdir): output, _ = call_command_("mkdir " + kafka_bdir) kafka_download = 'wget -nv --tries=3 -c -O ' + kafka_bdir + kafka_dl + \ ' https://github.com/Juniper/contrail-third-party-cache/blob/master/kafka' + \ kafka_dl + '?raw=true' if not os.path.exists(kafka_bdir + kafka_dl): process = subprocess.Popen(kafka_download.split(' ')) process.wait() if process.returncode is not 0: return False basefile = kafka_version kafkabase = "/tmp/kafka.%s.%d/" % (os.getenv('USER', 'None'), broker_listen_port) confdir = kafkabase + basefile + "/config/" output, _ = call_command_("rm -rf " + kafkabase) output, _ = call_command_("mkdir " + kafkabase) logging.info('Check zookeeper in %d' % zk_client_port) zk = KazooClient(hosts='127.0.0.1:' + str(zk_client_port), timeout=60.0) try: zk.start() zk.delete("/brokers", recursive=True) zk.delete("/consumers", recursive=True) zk.delete("/controller", recursive=True) except: logging.info("Zookeeper client cannot connect") zk.stop() return False zk.stop() logging.info('Installing kafka in ' + kafkabase) x = os.system("cat " + kafka_bdir + kafka_dl + " | tar -xpzf - -C " + kafkabase) if 0 != x: logging.error("Cannot install kafka") return False logging.info('kafka Port %d' % broker_listen_port) replace_string_(confdir + "server.properties", [("#listeners=PLAINTEXT://:9092", "listeners=PLAINTEXT://:" + str(broker_listen_port))]) #Replace the brokerid and port # in the config file replace_string_( confdir + "server.properties", [("broker.id=0", "broker.id=" + str(broker_id)), ("zookeeper.connect=localhost:2181", "zookeeper.connect=localhost:%d" % zk_client_port), ("log.dirs=/tmp/kafka-logs", "log.dirs=" + kafkabase + "logs")]) replace_string_(kafkabase + basefile + "/bin/kafka-server-stop.sh", [("grep -v grep", "grep %s | grep -v grep" % kafkabase)]) replace_string_(kafkabase + basefile + "/bin/kafka-server-stop.sh", [("SIGINT", "SIGKILL")]) replace_string_(kafkabase + basefile + "/bin/kafka-server-stop.sh", [("#!/bin/sh", "#!/bin/sh -x")]) output, _ = call_command_("chmod +x " + kafkabase + basefile + "/bin/kafka-server-stop.sh") # Extra options for JMX : -Djava.net.preferIPv4Stack=true -Djava.rmi.server.hostname=xx.xx.xx.xx output, _ = call_command_(kafkabase + basefile + "/bin/kafka-server-start.sh -daemon " + kafkabase + basefile + "/config/server.properties") count = 0 start_wait = os.getenv('CONTRIAL_ANALYTICS_TEST_MAX_START_WAIT_TIME', 15) while count < start_wait: try: logging.info('Trying to connect...') kk = KafkaClient("localhost:%d" % broker_listen_port) except: count += 1 time.sleep(1) else: return True logging.info("Kafka client cannot connect. Kafka logfile below:") with open(kafkabase + basefile + "/logs/kafkaServer.out", 'r') as fin: logging.info(fin.read()) return False