コード例 #1
0
    def run(self):
        self._draw_text('Ready!', 36)

        # Help
        if len(sys.argv) != 1:
            print("Usage: kafkaConsumer.py")
            exit(-1)

        # Inicializa variaveis
        b_cmd = "stampsnet.hashtagsource.com"
        t_cmd = "erlun"
        b_photo = "stampsnet.hashtagsource.com"
        t_photo = "erlunPhoto"
        
        # Conecta no servidor Kafka (Consumer)
        consumer = KafkaConsumer(bootstrap_servers=b_cmd)

        # Acessa o topico
        consumer.subscribe([t_cmd])

        # Conecta no servidor Kafka (Producer)
        kafkaServer = KafkaClient(b_photo)
        producer = SimpleProducer(kafkaServer)


        # Imprime somente as mensagens novas
        for message in consumer:


            # Abre janela com camera
            # if message.value == "T":     
            #    p = subprocess.Popen(["camAndroid.py"], shell = True)


            # Fecha janela com camera
            # if message.value == "F":     
            #    subprocess.call(['taskkill', '/F', '/T', '/PID', str(p.pid)])


            # Tira foto e envia para o Kafka, topico erlunPhoto
            if message.value == "P":
                print (message.value)
                # Use urllib to get the image from the Android IP Webcam App
                imgResp = urllib.urlopen(url)  
                # Numpy to convert into a array
                imgNp = np.array(bytearray(imgResp.read()),dtype=np.uint8)   
                # Finally decode the array to OpenCV usable format
                img = cv2.imdecode(imgNp,-1)
                # Pesquisar depois: MessageSizeTooLargeError
                cv2.imwrite("imgTemp.jpg",img)
                img = open("imgTemp.jpg", "rb").read()
                os.remove("imgTemp.jpg")
                # Converte para base64      
                imgBase64 = base64.b64encode(img)
                # Envia mensagem para o Kafka
                producer.send_messages(t_photo, imgBase64)

            if message.value == "F":
                self._do_countdown()
                self._draw_text('Cheese!', 36)
                self._filename = 'raw-{0}.png'.format(time.strftime("%Y%m%d-%H%M%S"))
                self._take_picture()
                self._draw_text('Please wait...', 24)
                self._show_picture()
                img = open(self._filename, "rb").read()
                imgBase64 = base64.b64encode(img)
                producer.send_messages(t_photo, imgBase64)

            # Outros comandos
            if message.value == "U":
                print ("U - Up")
                forward()
            if message.value == "D":
                print ("D - Down")
                back()
            if message.value == "L":
                print ("L - Left")
                left()
            if message.value == "R":
                print ("R - Right")
                right()
            if message.value == "0.0":
                print ("Speed 0.0")
                stop()
            if message.value == "1.0":
                print ("Speed 1.0")
                set_speed(1)
            if message.value == "2.0":
                print ("Speed 2.0")
                set_speed(2)
            if message.value == "3.0":
                print ("Speed 3.0")
                set_speed(3)
            if message.value == "4.0":
                print ("Speed 4.0")
                set_speed(4)
            if message.value == "5.0":
                print ("Speed 5.0")
                set_speed(5)
            if message.value == "6.0":
                print ("Speed 6.0")
                set_speed(6)
            if message.value == "7.0":
                print ("Speed 7.0")
                set_speed(7)
            if message.value == "8.0":
                print ("Speed 8.0")
                set_speed(8)
            if message.value == "9.0":
                print ("Speed 9.0")
                set_speed(9)

            if message.value == "q": 
                stop()
                break

        cap.release()
        cv2.destroyAllWindows()
コード例 #2
0
ファイル: test.py プロジェクト: yujiye/Codes
def init_kafka():
    global kafkaProducer
    (url) = config.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    kafkaProducer = SimpleProducer(kafka)
コード例 #3
0
ファイル: pipelines.py プロジェクト: xinyuekun/scrapy-cluster
 def from_settings(cls, settings):
     kafka = KafkaClient(settings['KAFKA_HOSTS'])
     producer = SimpleProducer(kafka)
     topic_prefix = settings['KAFKA_TOPIC_PREFIX']
     return cls(producer, topic_prefix, kafka)
コード例 #4
0
# Kafka producer that reads the input data in a loop in order to simulate real time events
import os
import sys
from kafka import KafkaClient, KeyedProducer, SimpleConsumer
from datetime import datetime
import time
import fileinput
kafka = KafkaClient("54.67.107.239:6667")
source_file = '/home/ubuntu/anomalies.txt'


def genData(topic):
    producer = KeyedProducer(kafka)
    while True:

        for line in fileinput.input(source_file):
            key = line.split("\t")[0]
            print key
            print line.rstrip()
            producer.send(topic, key, line.rstrip())
            time.sleep(0.1)  # Creating some delay to allow
        fileinput.close()


genData("Anomaly")
コード例 #5
0
import os
from kafka import KafkaClient, SimpleConsumer
from datetime import datetime

kafka = KafkaClient("localhost:9092")


class HDFS_Consumer(object):
    '''
    This class is used to receive messages from Kafka, and save it to hdfs system.
    Messages are first saved to a temporary file on local machine, then transfered
    to hdfs atomically. Files are saved to a folder serving as steaming source 
    for Structured Streaming.
    '''
    def __init__(self, hdfs_directory, max_count):
        '''
        hdfs_directory is the folder where data is saved on hdfs.
        '''
        self.hdfs_dir = hdfs_directory
        self.count = 0
        self.max_count = max_count

    def getTimestamp(self):
        return datetime.now().strftime("%Y_%m_%d_%H_%M_%S")

    def consume_topic(self, topic, group, temp_dir):
        '''
        This function receive messages from Kafka then save it to a temporary
        first, then transfer the file to hdfs.
        '''
        # Create a kafka receiver to grap messages
コード例 #6
0
            self._logger.info("%s" % messag)
            return False
        else:
            self._callback(self._partno, chg)
        return True


if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(levelname)s %(message)s')

    workers = {}
    brokers = "localhost:9092,localhost:9093,localhost:9094"
    group = "workers"

    kafka = KafkaClient(brokers, str(os.getpid()))
    cons = SimpleConsumer(kafka, group, "ctrl")
    cons.provide_partition_info()
    print "Starting control"
    end_ready = False
    while end_ready == False:
        try:
            while True:
                part, mmm = cons.get_message(timeout=None)
                mm = mmm.message
                print "Consumed ctrl " + str(mm)
                if mm.value == "start":
                    if workers.has_key(mm.key):
                        print "Dup partition %s" % mm.key
                        raise ValueError
                    else:
コード例 #7
0
 def open(self):
     self.kafka_client = KafkaClient(self.broker_list, timeout=59)
     self.kafka_producer = SimpleProducer(self.kafka_client,
                                          batch_send=True,
                                          batch_send_every_n=500,
                                          batch_send_every_t=30)
コード例 #8
0
ファイル: views.py プロジェクト: moonyouj889/cereal_humidity
def get_kafka_client():
    if not hasattr(flask.g, "kafka_client"):
        flask.g.kafka_client = KafkaClient(HOSTS)
    return flask.g.kafka_client
コード例 #9
0
# pip install kafka-python
import datetime
import time
# from kafka import KafkaConsumer
from kafka import SimpleProducer, KafkaClient
import json
producerServer='192.169.34.63:9092'
producerTopic='AdServe'

consumerServer='172.29.65.53:9092'
consumerTopic='AdServe'
consumerGroup='AdServe-Streamer'

# Kafka Producer Config
kafkaProducerClient=KafkaClient(producerServer)
producer=SimpleProducer(kafkaProducerClient, async=True, req_acks=SimpleProducer.ACK_NOT_REQUIRED)

class AdClickLog(object):
    imprId="asdadasdasda"	
    clmbUserId="testuser"
    adSltDimId="145456"
    auds="au,er"
    itemid="2000116"
    algo=87
    itmClmbLId=111148
    tmpltId=321
    refUrl="google.com"
    geoDimId="458"
    clickBid=3.2
    ip="192.168.33.192"
    section="0"
コード例 #10
0
#!/usr/bin/env python

import json
import os
from kafka import SimpleProducer, KafkaClient

# Set up Kafka
kafka = KafkaClient("ec2-52-8-111-39.us-west-1.compute.amazonaws.com:9092")
producer = SimpleProducer(kafka)

# reading user records from files and sending login names and IDs as messages to Kafka
count = 0
with open("final_usernames_file", "w") as op_file:
    files = os.listdir("../../data/users_data")
    for i, filename in enumerate(files):
        if filename.startswith("users"):
            with open("../../data/users_data/" + filename, "r") as ip_file:
                try:
                    print "\nfrom file: ", filename
                    json_records = json.load(ip_file)
                    for user in json_records:
                        user_dict = {}
                        user_dict["login"] = user["login"]
                        user_dict['id'] = user["id"]
                        producer.send_messages("github-usernames-good-1",
                                               json.dumps(user_dict))
                        op_file.write(json.dumps(user_dict))
                        print "count: ", count, "id: ", user_dict["id"]
                        count += 1
                except:
                    print "Some error in file: ", filename
コード例 #11
0
    	 
        #tweet = json.loads(data)
        producer.send_messages("Brands", data.encode('utf-8'))
        print(data)
        return True
    def on_error(self, status):
        print (status)


ipfile = open('ip_addresses.txt', 'r')
ips = ipfile.read()[:-1]
ipfile.close()
ips = ips.split(',')

#IP instances are loaded from external file
kafka = KafkaClient('YOUR_IP_ADDRESS:PORT')

producer = SimpleProducer(kafka)

l = StdOutListener()

auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

stream = Stream(auth, l)

#Array should be loaded from an external file
search_term_arr = ['Pepsi', 'Coke', 'Nike', 'Apple', 'Samsung']

stream.filter(languages=["en"], track=search_term_arr)
コード例 #12
0
        print broker_info
        broker_ip = json.loads(broker_info[0])['host']
        broker_port = json.loads(broker_info[0])['port']
        bootstrap_servers_list.append(str(broker_ip) + ':' + str(broker_port))

except:
    print "zookeeper not found , using default "
    bootstrap_servers_list = ['192.168.150.80:9092']

print "kafka bootstrap servers ", bootstrap_servers_list
producer = KafkaProducer(bootstrap_servers=bootstrap_servers_list)

topic_name = 'my-topic3'

#check if topic exists
kafka_client = KafkaClient(bootstrap_servers_list)
server_topics = kafka_client.topic_partitions

if topic_name in server_topics:
    print "topic exits "
else:
    print "create topic "
    print "/opt/kafka/bin/kafka-topics.sh --create --zookeeper  192.168.150.70:2181  --replication-factor 2 --partitions 2 --topic " + topic_name
    # os.system ("opt/kafka/bin/kafka-topics.sh --create --zookeeper  192.168.150.70:2181  --replication-factor 2 --partitions 2 --topic " + topic_name )

print server_topics

list_of_messages = [
    (topic_name, None, None, '----------------------------------------'),
    (topic_name, None, None, 'msg 1 ---- partition - None / key - None'),
    (topic_name, None, None, 'msg 2 ---- partition - None / key - None'),
コード例 #13
0
import json,configparser


class GeoTweetListener(StreamListener):

  def on_data(self, data):
    tweet=json.loads(data)
    try:
      if tweet["place"]:
        producer.send_messages('geoBasedTweets', data.encode("utf-8"))
        #print(data)
    except KeyError as msg:
      print(msg)

    return True

  def on_error(self, status):
    print(status)
    return True

if __name__ == '__main__':
  config = configparser.ConfigParser()
  config.read('config.ini')
  kafka_client = KafkaClient("sandbox-hdp.hortonworks.com:6667")  
  producer = SimpleProducer(kafka_client)
  l = GeoTweetListener()
  auth = OAuthHandler(config['TwitterAPI']['key'], config['TwitterAPI']['secret'])
  auth.set_access_token(config['TwitterAPI']['token'], config['TwitterAPI']['token_secret'])
  stream = Stream(auth, l)
  stream.filter(track=['#corona','#coronavirus','#covid','#StayAtHome','#stayhome', '#CoronaLockdown', '#covid19','#covid2019'],filter_level=None,languages=["en"])
コード例 #14
0
ファイル: kr36.py プロジェクト: yujiye/Codes
import config
import loghelper
import my_request
import util

#logger
loghelper.init_logger("kr36_spider", stream=True)
logger = loghelper.get_logger("kr36_spider")

#mongo
(mongodb_host, mongodb_port) = config.get_mongodb_config()
mongo = MongoClient(mongodb_host, mongodb_port)

#kafka
(kafka_url) = config.get_kafka_config()
kafka = KafkaClient(kafka_url)
# HashedPartitioner is default
kafka_producer = SimpleProducer(kafka)

#
company_collection = mongo.crawler_v2.company
company_collection.create_index([("source", pymongo.DESCENDING), ("company_key", pymongo.DESCENDING)], unique=True)
member_collection = mongo.crawler_v2.member
member_collection.create_index([("source", pymongo.DESCENDING), ("member_key", pymongo.DESCENDING)], unique=True)
news_collection = mongo.crawler_v2.news
news_collection.create_index([("source", pymongo.DESCENDING),("company_key", pymongo.DESCENDING),("news_key", pymongo.DESCENDING)], unique=True)
investor_collection = mongo.crawler_v2.investor
investor_collection.create_index([("source", pymongo.DESCENDING), ("investor_key", pymongo.DESCENDING)], unique=True)

#
source = 13020
コード例 #15
0
import time
import cv2
from kafka import SimpleProducer, KafkaClient
kafka = KafkaClient('localhost:9092')
producer = SimpleProducer(kafka)
# Assign a topic
topic = 'streams-input'


def video_emitter():
    # Open the video
    video = cv2.VideoCapture(0)
    print(' emitting.....')

    t0 = time.time()

    # read the file
    while (video.isOpened):
        # read the image in each frame
        success, image = video.read()
        # check if the file has read to the end
        if not success:
            break
        # convert the image png
        ret, jpeg = cv2.imencode('.png', image)
        # Convert the image to bytes and send to kafka
        t1 = time.time()
        future = producer.send_messages(topic, jpeg.tobytes())

        # To reduce CPU usage create sleep time of 0.2sec
        #time.sleep(0.10)
コード例 #16
0
	def initialize(self, stormconf, context):
		# self.words = itertools.cycle(['dog', 'cat',
		# 								'zebra', 'elephant'])
		self.kafka = KafkaClient("cloud.soumet.com:9092")
		self.consumer = SimpleConsumer(self.kafka, "storm", "realtime", max_buffer_size=1310720000)
コード例 #17
0
from kafka import SimpleProducer, KafkaClient
import avro.schema
import io, random
from avro.io import DatumWriter
import cProfile

# Kafka settings
kafkaHost = "localhost:9092"
kafkaTopic = "avro"

kafka = KafkaClient(kafkaHost)
producer = SimpleProducer(kafka)

schema_path = "../schemas/user.avsc"
schema = avro.schema.parse(open(schema_path).read())

writer = avro.io.DatumWriter(schema)
buffer = io.BytesIO()
encoder = avro.io.BinaryEncoder(buffer)

datum = {
    "created_at": "2015-10-21T09:47:50-04:00",
    "name": "123",
    "favorite_color": "111",
    "favorite_number": random.randint(0, 10)
}
writer.write(datum, encoder)

raw_bytes = buffer.getvalue()

producer.send_messages(kafkaTopic, raw_bytes)
コード例 #18
0
from kafka import SimpleProducer, KafkaClient
import json

sample = {'imageMatch': True, 'fileName': 'M.Ravichandran.1.jpg'}

kafka = KafkaClient('10.6.4.36:9092')
producer = SimpleProducer(kafka)
data = json.dumps(sample)
producer.send_messages(b'003', data)
コード例 #19
0
    def _run(self):
        pcount = 0
        while True:
            try:
                self._logger.error("New KafkaClient %d" % self._partition)
                self._kfk = KafkaClient(self._brokers, str(os.getpid()))
                try:
                    consumer = SimpleConsumer(self._kfk,
                                              self._group,
                                              self._topic,
                                              buffer_size=4096 * 4,
                                              max_buffer_size=4096 * 32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.info("%s" % messag)
                    raise RuntimeError(messag)

                self._logger.error("Starting %d" % self._partition)

                # Find the offset of the last message that has been queued
                consumer.seek(0, 2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %d is %s" % \
                                  (self._partition,str(mi)))

                # start reading from last previously processed message
                if mi != None:
                    consumer.seek(0, 1)
                else:
                    consumer.seek(0, 0)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        self.resource_check()
                        mlist = consumer.get_messages(10, timeout=0.2)
                        for mm in mlist:
                            if mm is None:
                                continue
                            self._logger.debug("%d Reading offset %d" % \
                                    (self._partition, mm.offset))
                            consumer.commit()
                            pcount += 1
                            if not self.msg_handler(mm):
                                self._logger.info("%d could not handle %s" %
                                                  (self._partition, str(mm)))
                                raise gevent.GreenletExit
                    except TypeError as ex:
                        self._logger.error("Type Error: %s trace %s" % \
                                (str(ex.args), traceback.format_exc()))
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.error("Payload Error: %s" % str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except AssertionError as ex:
                self._partoffset = ex
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                gevent.sleep(2)

        partdb = {}
        for coll in self._uvedb.keys():
            partdb[coll] = {}
            for gen in self._uvedb[coll].keys():
                partdb[coll][gen] = {}
                for tab in self._uvedb[coll][gen].keys():
                    for rkey in self._uvedb[coll][gen][tab].keys():
                        uk = tab + ":" + rkey
                        partdb[coll][gen][uk] = \
                            set(self._uvedb[coll][gen][tab][rkey].keys())

        self._logger.error("Stopping %d pcount %d" % (self._partition, pcount))
        self.stop_partition()
        return self._partoffset, partdb
コード例 #20
0
 def __init__(self):
     self._brokers = APP_CONFIG["rti_kafka"]["brokers"]
     self._partitions = APP_CONFIG["rti_kafka"]["partitions"]
     self._topic = APP_CONFIG["rti_kafka"]["topic"]
     self._kafka = KafkaClient(self._brokers)
     self.producer = None
コード例 #21
0
db = mongo_client["spark"]
collection = db["transactionData"]

topic_name = "creditcard"
topic_list = [
    NewTopic(
        name=topic_name,
        num_partitions=1,
        replication_factor=1,
        topic_configs={'retention.ms': '300000'}
    )
]

# Retrieving already-created list of topics and then deleting

client = KafkaClient(bootstrap_servers=['localhost:9092'])
metadata = client.cluster
future = client.cluster.request_update()
client.poll(future=future)
broker_topics = metadata.topics()

admin_client = KafkaAdminClient(bootstrap_servers=['localhost:9092'])
if topic_name in broker_topics:
    deletion = admin_client.delete_topics([topic_name])
    sleep(2)
    try:
        future = client.cluster.request_update()
        client.poll(future=future)
    except KafkaError as e:
        print(e)
        pass
コード例 #22
0
                    datetime.utcfromtimestamp(last_ts))
    else:
        # Consider RIBs dumps only
        now = time.time()
        last_ts = int(now - now % 3600)
        logger.info("loading from: %s", datetime.utcfromtimestamp(last_ts))

    stream.add_filter('record-type', 'ribs')
    stream.add_filter('record-type', 'updates')

    stream.add_interval_filter(last_ts, 0)

    # Start the stream
    stream.start()

    client = KafkaClient(args.our_servers.split(","))
    count = 0
    for batch in group_by_n(
            messages_from_internal(iterate_stream(stream, args.collector)),
            1000):
        req = ProduceRequest("rib-{}".format(args.collector), 0, batch)
        for msg in reversed(req.messages):
            if msg.value is None:
                continue
            last_timestamp = json.loads(msg.value)["timestamp"]
            break

        count += len(batch)
        logger.info("sending %i", count)
        res = client.send_produce_request([req])
        try:
コード例 #23
0
from kafka import SimpleProducer, KafkaClient
import avro.schema
import io
import random
from avro.io import DatumWriter

# To send messages synchronously
kafka = KafkaClient('toti-2:9092')
producer = SimpleProducer(kafka)

# Kafka topic
topic = "monitoring"

# Path to user.avsc avro schema
schema_path = "user.avsc"
schema = avro.schema.parse(open(schema_path).read())


for i in xrange(10):
    writer = avro.io.DatumWriter(schema)
        bytes_writer = io.BytesIO()
            encoder = avro.io.BinaryEncoder(bytes_writer)
                writer.write({"hostname": "totino-1", "check": "memory",
                              "metric": random.randint(0, 10)}, encoder)
                    raw_bytes = bytes_writer.getvalue()
                    producer.send_messages(topic, raw_bytes)
コード例 #24
0
 def create_kafka_client(self):
     logging.info("Creating kafka client (thread: %s)", threading.current_thread().getName())
     return KafkaClient(self.kafka_brokers)
コード例 #25
0
from kafka import SimpleConsumer, SimpleClient
from kafka import KafkaConsumer
from kafka import KafkaClient

group_name = "my-group"
topic_name = "fast-messages"

kafka = KafkaClient('kafka1:9092')
consumer = SimpleConsumer(kafka, group_name, topic_name)

print("Created consumer for group:" + group_name + " and topic: " +
      topic_name + "")
print("Waiting for messages...")

for msg in consumer:
    print(msg)
コード例 #26
0
keyword = "COVID"
portno  = "9092"

access_token        = ""
access_token_secret = "" 
consumer_key        = "" 
consumer_secret     = ""

class StdOutListener(StreamListener):
    def on_data(self, data):
        producer.send_messages(keyword, data.encode('utf-8'))
        print (data)
        return True
    def on_error(self, status):
        print (status)

kafka    = KafkaClient("localhost:"+portno)
producer = SimpleProducer(kafka)
l        = StdOutListener()


auth     = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)


stream   = Stream(auth, l)



stream.filter(track=keyword)
コード例 #27
0
from kafka import SimpleProducer, KafkaClient
import socket

s = socket.socket()  # Create a socket object
host = socket.gethostname()  # Get local machine name
port = 9999  # Reserve a port for your service.
s.bind(("127.0.0.1", port))  # Bind to the port

print("Listening for Drill Sim")
s.listen(1)
c, addr = s.accept()  # Now wait for client connection.
print("Drill Sim Connected")
message = ""

print("Connecting to Kafka")
kafka = KafkaClient('127.0.0.1:9998')
producer = SimpleProducer(kafka)
print("Kafka Connected")

data = []

while True:
    char = c.recv(1)
    message += char
    if char == "\n":
        split_message = message.strip("\r\n").split(',')
        if len(split_message) == 3 and split_message[0] != '10':
            data.append(split_message[1])
        elif len(split_message) == 3 and split_message[0] == '10':
            data.append(split_message[1])
            data.append(split_message[2])
コード例 #28
0
ファイル: torfka.py プロジェクト: mydrone/Torfka
 def produce(self, topic, message):
     #  Tor messes with the kafka stream. Reset changes.
     self.reset()
     kafka = KafkaClient('%s:9092' % self.hostIP)
     producer = SimpleProducer(kafka)
     producer.send_messages(topic, message)
コード例 #29
0
 def _kafka_client(self):
     kafka_client = KafkaClient(self.config.cluster_config.broker_list)
     try:
         yield kafka_client
     finally:
         kafka_client.close()
コード例 #30
0
ファイル: kafkadump.py プロジェクト: sihai90/scrapy-dynamic
def main():
    # initial main parser setup
    parser = argparse.ArgumentParser(
        description='Kafka Dump: Scrapy Cluster Kafka topic dump utility for '
        'debugging.',
        add_help=False)
    parser.add_argument('-h',
                        '--help',
                        action=ArgparseHelper,
                        help='show this help message and exit')

    subparsers = parser.add_subparsers(help='commands', dest='command')

    # args to use for all commands
    base_parser = argparse.ArgumentParser(add_help=False)
    base_parser.add_argument('-kh',
                             '--kafka-host',
                             action='store',
                             required=False,
                             help="The override Kafka host")
    base_parser.add_argument('-s',
                             '--settings',
                             action='store',
                             required=False,
                             help="The settings file to read from",
                             default="localsettings.py")
    base_parser.add_argument(
        '-ll',
        '--log-level',
        action='store',
        required=False,
        help="The log level",
        default=None,
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'])

    # list command
    list_parser = subparsers.add_parser('list',
                                        help='List all Kafka topics',
                                        parents=[base_parser])

    # dump command
    dump_parser = subparsers.add_parser('dump',
                                        help='Dump a Kafka topic',
                                        parents=[base_parser])
    dump_parser.add_argument('-t',
                             '--topic',
                             action='store',
                             required=True,
                             help="The Kafka topic to read from")
    dump_parser.add_argument('-c',
                             '--consumer',
                             action='store',
                             required=False,
                             default=None,
                             help="The Kafka consumer id to use")
    dump_parser.add_argument('-b',
                             '--from-beginning',
                             action='store_const',
                             required=False,
                             const=True,
                             help="Read the topic from the beginning")
    dump_parser.add_argument('-nb',
                             '--no-body',
                             action='store_const',
                             required=False,
                             const=True,
                             default=False,
                             help="Do not include the raw html 'body' key in"
                             " the json dump of the topic")
    dump_parser.add_argument('-p',
                             '--pretty',
                             action='store_const',
                             required=False,
                             const=True,
                             default=False,
                             help="Pretty print the json objects consumed")
    dump_parser.add_argument('-d',
                             '--decode-base64',
                             action='store_const',
                             required=False,
                             const=True,
                             default=False,
                             help="Decode the base64 encoded raw html body")

    args = vars(parser.parse_args())

    wrapper = SettingsWrapper()
    settings = wrapper.load(args['settings'])

    kafka_host = args['kafka_host'] if args['kafka_host'] else settings[
        'KAFKA_HOSTS']
    log_level = args['log_level'] if args['log_level'] else settings[
        'LOG_LEVEL']
    logger = LogFactory.get_instance(level=log_level, name='kafkadump')

    if args['command'] == 'list':
        try:
            logger.debug("Connecting to {0}...".format(kafka_host))
            kafka = KafkaClient(kafka_host)
            logger.info("Connected to {0}".format(kafka_host))
        except KafkaUnavailableError as ex:
            message = "An exception '{0}' occured. Arguments:\n{1!r}" \
                .format(type(ex).__name__, ex.args)
            logger.error(message)
            sys.exit(1)
        logger.debug('Running list command')
        print("Topics:")
        for topic in list(kafka.topic_partitions.keys()):
            print("-", topic)
        kafka.close()
        return 0
    elif args['command'] == 'dump':
        logger.debug('Running dump command')
        topic = args["topic"]
        consumer_id = args["consumer"]

        try:
            logger.debug("Getting Kafka consumer")

            offset = 'earliest' if args["from_beginning"] else 'latest'

            consumer = KafkaConsumer(  # 消费来自demo.crawled_firehose话题的消息
                topic,
                group_id=consumer_id,
                bootstrap_servers=kafka_host,
                consumer_timeout_ms=settings['KAFKA_CONSUMER_TIMEOUT'],
                auto_offset_reset=offset,
                auto_commit_interval_ms=settings[
                    'KAFKA_CONSUMER_COMMIT_INTERVAL_MS'],
                enable_auto_commit=settings[
                    'KAFKA_CONSUMER_AUTO_COMMIT_ENABLE'],
                max_partition_fetch_bytes=settings[
                    'KAFKA_CONSUMER_FETCH_MESSAGE_MAX_BYTES'])
        except NoBrokersAvailable as ex:
            logger.error('Unable to connect to Kafka')
            sys.exit(1)

        num_records = 0
        total_bytes = 0
        item = None

        while True:
            try:
                for message in consumer:
                    if message is None:
                        logger.debug("no message")
                        break
                    logger.debug("Received message")
                    val = message.value
                    try:
                        item = json.loads(val)
                        if args['decode_base64'] and 'body' in item:
                            item['body'] = base64.b64decode(item['body'])

                        if args['no_body'] and 'body' in item:
                            del item['body']
                    except ValueError:
                        logger.info("Message is not a JSON object")
                        item = val
                    body_bytes = len(item)

                    if args['pretty']:
                        print(json.dumps(item, indent=4))
                    else:
                        print(item)
                    num_records = num_records + 1
                    total_bytes = total_bytes + body_bytes
            except KeyboardInterrupt:
                logger.debug("Keyboard interrupt received")
                break
            except:
                logger.error(traceback.print_exc())
                break

        total_mbs = old_div(float(total_bytes), (1024 * 1024))
        if item is not None:
            print("Last item:")
            print(json.dumps(item, indent=4))
        if num_records > 0:
            logger.info(
                "Num Records: {n}, Total MBs: {m}, kb per message: {kb}".
                format(n=num_records,
                       m=total_mbs,
                       kb=(float(total_bytes) / num_records / 1024)))
        else:
            logger.info("No records consumed")
            num_records = 0

        logger.info("Closing Kafka connection")
        try:
            consumer.close()
        except:
            # Exception is thrown when group_id is None.
            # See https://github.com/dpkp/kafka-python/issues/619
            pass
        return 0