예제 #1
0
        def consume_topic(callback_url, consumer_group, topic):
            consumer = None
            try:
                consumer = SimpleConsumer(self.kafka,
                                          consumer_group,
                                          topic,
                                          auto_commit=False)
                messages_read = 0

                # we can't read messages infinitely here as we have
                # a lot of topics/subscribers (much more than threadpool size)
                while messages_read < self.max_read_messages_per_cycle:

                    # get one message and monitor the time
                    start = monitoring.start_time_measure()
                    message = consumer.get_message(block=False)
                    ms_elapsed = monitoring.stop_time_measure(start)
                    self.metrics['kafka_read'].add({'topic': topic},
                                                   ms_elapsed)

                    # if we don't have messages for this topic/subscriber - quit and give chance to others
                    if message is None:
                        logging.info(
                            'No messages for topic: %s and callback: %s, quiting the thread',
                            topic, callback_url)
                        break

                    try:
                        event = json.loads(
                            message.message.value.decode('utf-8'))
                        response_status = self.forward_event(
                            callback_url, event, topic)

                        # if status is success - mark message as consumed by this subscriber
                        if 200 <= response_status < 300:
                            consumer.commit()
                        else:
                            logging.info(
                                'Received error response fro consumer: %s',
                                response_status)
                    except:
                        logging.error(
                            "Exception while sending event to consumer")
                        logging.error(traceback.format_exc())
                    finally:
                        messages_read += 1
                return messages_read

            except UnknownTopicOrPartitionError:
                logging.error('Adding %s to skip list', topic)
            except:
                logging.exception('failed to create kafka client')
            finally:
                if consumer is not None:
                    consumer.stop()
예제 #2
0
class HBaseServer(threading.Thread):
    """
    HBase thread that will continuously read from Kafka queue
    """
    def __init__(self, kafka_url, kafka_topic, hbase_url, hbase_thrift_port,
                 hbase_table):
        threading.Thread.__init__(self)

        self.kafka = KafkaClient(kafka_url)
        self.cons = SimpleConsumer(self.kafka, None, kafka_topic)
        self.cons.seek(0, 2)

        self.hbase_connect = happybase.Connection(hbase_url, hbase_thrift_port)
        self.car_table = self.hbase_connect.table(hbase_table)

        self.server_on_flag = True
        self.m = None
        self.payload = None
        self.vin = None
        self.time = None
        self.data = None
        self.row_key = None
        self.count = 0

    def run(self):
        while self.server_on_flag:

            self.m = self.cons.get_message(block=False)

            if (self.m is not None):
                self.payload = json.loads(self.m.message.value)
                self.vin = str(self.payload['vin'])
                self.time = str(self.payload['timestamp'])
                self.data = str(self.payload['data'])

                self.row_key = self.vin + self.time
                try:
                    self.car_table.put(self.vin,
                                       {'user:mostrecent': self.time})
                    self.car_table.put(self.row_key, {'car:data': self.data})
                    self.count = self.count + 1
                    logger.info(
                        'HBase Server: key: %s, table: %s, car{data: %s}. Message number: %s',
                        self.row_key, 'rvi', self.data, str(self.count))

                except Exception as e:
                    logger.info('%s,Data Push into HBase unsuccessful...', e)

            else:
                sleep(1 / 5)

    def shutdown(self):
        self.server_on_flag = False
        logger.info('HBase Server shutting down...')
예제 #3
0
class HBaseServer(threading.Thread):
    """
    HBase thread that will continuously read from Kafka queue
    """

    def __init__(self, kafka_url, kafka_topic, hbase_url, hbase_thrift_port, hbase_table):
        threading.Thread.__init__(self)
        
        self.kafka = KafkaClient(kafka_url)
        self.cons = SimpleConsumer(self.kafka, None, kafka_topic)
        self.cons.seek(0,2)
        
        self.hbase_connect = happybase.Connection(hbase_url,hbase_thrift_port)
        self.car_table = self.hbase_connect.table(hbase_table)
        
        self.server_on_flag = True        
        self.m = None
        self.payload = None
        self.vin = None
        self.time = None
        self.data = None
        self.row_key = None
        self.count = 0

    def run(self):
        while self.server_on_flag:

            self.m = self.cons.get_message(block=False)
           
            if (self.m is not None):
                self.payload = json.loads(self.m.message.value)
                self.vin = str(self.payload['vin'])
                self.time = str(self.payload['timestamp'])
                self.data = str(self.payload['data'])
                
                self.row_key = self.vin+self.time
                try:
                    self.car_table.put(self.vin,{'user:mostrecent':self.time})
                    self.car_table.put(self.row_key,{'car:data':self.data})
                    self.count = self.count + 1
                    logger.info('HBase Server: key: %s, table: %s, car{data: %s}. Message number: %s', self.row_key, 'rvi', self.data, str(self.count))     
           
                except Exception as e:
                    logger.info('%s,Data Push into HBase unsuccessful...', e)

            else:
                sleep(1/5)

    def shutdown(self):
        self.server_on_flag = False
        logger.info('HBase Server shutting down...')
예제 #4
0
class KafkaTopicQueue:
  def __init__(self, topic, host="localhost:9092"):
    self.topic = topic
    self.group = "group-for-%s"%(self.topic)
    self.kafka = SimpleClient(host)
    self.producer = SimpleProducer(self.kafka)
    self.consumer = SimpleConsumer(self.kafka, self.group, self.topic)

  def push(self, v):
    self.producer.send_messages(self.topic, v)

  def pop(self):
    item = self.consumer.get_message()
    return item.message.value if item else None
예제 #5
0
class KafkaTopicQueue:
    def __init__(self, topic, host="localhost:9092"):
        self.topic = topic
        self.group = "group-for-%s" % (self.topic)
        self.kafka = SimpleClient(host)
        self.producer = SimpleProducer(self.kafka)
        self.consumer = SimpleConsumer(self.kafka, self.group, self.topic)

    def push(self, v):
        self.producer.send_messages(self.topic, v)

    def pop(self):
        item = self.consumer.get_message()
        return item.message.value if item else None
예제 #6
0
def kafka_consumer(kafka_hosts,
                   schema_host,
                   schema_port,
                   topic,
                   consumer_group="python"):
    """
    消费kafka对应topic的记录, 非实时消费
    :param kafka_hosts:
    :param schema_host:
    :param schema_port:
    :param topic:
    :param consumer_group:
    :return:
    """
    # 获取topic最新schema
    topic_schema, topic_schema_id, schema_version = get_latest_schema_info(
        schema_host, schema_port, topic)
    # 消费kafka记录
    client = KafkaClient(hosts=kafka_hosts)
    simple_consumer = SimpleConsumer(client,
                                     consumer_group,
                                     topic,
                                     auto_offset_reset="smallest")
    collect_logs = []  # 存放消息记录的partition,offset,value
    msg_exist = True
    while msg_exist:
        msg = simple_consumer.get_message(get_partition_info=True)
        # print "kafka log:", msg
        # 判断此次获取的记录是否为None,为None则停止消费
        if msg is None:
            msg_exist = False
        else:
            msg_partition = msg[0]
            msg_offset = msg[1].offset
            msg_value = msg[1].message.value
            # 对单条记录解码
            bytes_msg = io.BytesIO(msg_value[5:])
            decode_msg = avro.io.BinaryDecoder(bytes_msg)
            recode_msg = avro.io.DatumReader(
                avro.schema.parse(topic_schema)).read(decode_msg)
            # 收集该log的partition,offset,value信息
            msg_collect = [msg_partition, msg_offset, recode_msg]
            collect_logs.append(msg_collect)
    collect_logs.sort(key=lambda x: x[0])  # 按partition id排序
    print "+++++++Topic: %s+++++++" % topic
    for index, log in enumerate(collect_logs):
        print index, log
    print "Successfully received."
    return collect_logs
예제 #7
0
class RVIConsumer(threading.Thread):

    def __init__(self, kafka_addr, topic, vin, web_url):
        threading.Thread.__init__(self)

        self.kafka = KafkaClient(kafka_addr) #kafka_addr
        self.cons = SimpleConsumer(self.kafka, None, topic)
        self.cons.seek(0,2)

        self.vin = vin
        self.web_url = web_url 
        self.flag = True
        self.count = 0
        self.sleep_count = 0
        self.headers = {'Content-Type' : 'application/json'}

    def is_running(self):
        return self.flag
        
    def run(self):
        while self.flag:
            
            #cons = SimpleConsumer(kafka, None, 'rvi')
            m = self.cons.get_message(block=False)
            if (m is not None):
                payload = json.loads(m.message.value)

                if(payload['vin'] == self.vin):
                    self.sleep_count = 0 
                    payloadtoweb = json.dumps(m.message.value)
                    r = requests.post(self.web_url, data=payloadtoweb, headers=self.headers) 
                    if (r.status_code is 200):
                        print m.message.value + " sent successfully\n"        
                    else: 
                        print "%s is not available, status code:%d...shutting down now..."%(self.web_url,r.status_code)
                        self.shutdown()       

            else:
                if (self.sleep_count > 100000):
                    print "No new data for %s... Timing out" % self.vin
                    self.shutdown()

                time.sleep(1/5)
                self.sleep_count = self.sleep_count + 1

    def shutdown(self):
        self.flag = False    
        requests.post(self.web_url, data=json.dumps({'vin':self.vin, 'data':'EOM'}), headers=self.headers) 
        print "%s consumer thread shutting down" % self.vin 
예제 #8
0
def serve_user(user):
    consumer = SimpleConsumer(CLIENT, 'testing', 'user{}_sess{}'.format(user,user))
    msg = None
    msg = consumer.get_message()
    RECEIVE_TIME = time.time()
    color='yellow'

    S_R_LAG = RECEIVE_TIME-SEND_TIME if SEND_TIME else None
    
    if msg:
        print("received message: {} delay: {}".format(msg.message.value.decode(), S_R_LAG))
        if msg.message.value.decode() =='True':
            color='green'
        else:
            color='red'
    return render_template('keylog.html', bgcolor=color)
예제 #9
0
def main():
    """
    Usage:
        dump_to_mongodb dump <topic> --host=<host> [--consumer=<consumer>]
    """
    args = docopt(main.__doc__)
    host = args["--host"]

    print "=> Connecting to {0}...".format(host)
    logger.info("=> Connecting to {0}...".format(host))
    kafka = KafkaClient(host)
    print "=> Connected."
    logger.info("=> Connected.")
    if args["dump"]:
        topic = args["<topic>"]
        consumer_id = args["--consumer"] or "dump_to_mongodb"
        consumer = SimpleConsumer(
            kafka,
            consumer_id,
            topic,
            buffer_size=1024 * 200,  # 100kb
            fetch_size_bytes=1024 * 200,  # 100kb
            max_buffer_size=None  # eliminate big message errors
        )
        consumer.seek(0, 1)
        while True:
            try:
                message = consumer.get_message()
                if message is None:
                    time.sleep(1)
                    continue
                val = message.message.value
                logger.info("message.message.value== %s " % val)
                print('val==', val)
                try:
                    item = json.loads(val)
                except:
                    continue
                if 'meta' in item and 'collection_name' in item['meta']:
                    _insert_item_to_monggodb(item)
            except:
                traceback.print_exc()
                break
        kafka.close()
        return 0
예제 #10
0
class FirehoseConsumer(object):
    def __init__(self, kafka_hostport, topic, group=None, **kwargs):
        if not group:
            group = str(uuid.uuid4())

        self.kafka = get_client(kafka_hostport)
        self.consumer = SimpleConsumer(self.kafka, group, topic,
            max_buffer_size=1048576 * 32, **kwargs)

    def get_event(self):
        data = self.consumer.get_message()
        if not data:
            return None

        when, event, delivery, signature, raw = json.loads(data.message.value)
        payload = json.loads(raw)

        return when, event, delivery, signature, payload
예제 #11
0
        def consume_topic(callback_url, consumer_group, topic):
            consumer = None
            try:
                consumer = SimpleConsumer(self.kafka, consumer_group, topic, auto_commit=False)
                messages_read = 0

                # we can't read messages infinitely here as we have
                # a lot of topics/subscribers (much more than threadpool size)
                while messages_read < self.max_read_messages_per_cycle:

                    # get one message and monitor the time
                    start = monitoring.start_time_measure()
                    message = consumer.get_message(block=False)
                    ms_elapsed = monitoring.stop_time_measure(start)
                    self.metrics['kafka_read'].add({'topic': topic}, ms_elapsed)

                    # if we don't have messages for this topic/subscriber - quit and give chance to others
                    if message is None:
                        logging.info('No messages for topic: %s and callback: %s, quiting the thread', topic, callback_url)
                        break

                    try:
                        event = json.loads(message.message.value.decode('utf-8'))
                        response_status = self.forward_event(callback_url, event, topic)

                        # if status is success - mark message as consumed by this subscriber
                        if 200 <= response_status < 300:
                            consumer.commit()
                        else:
                            logging.info('Received error response fro consumer: %s', response_status)
                    except:
                        logging.error("Exception while sending event to consumer")
                        logging.error(traceback.format_exc())
                    finally:
                        messages_read += 1
                return messages_read

            except UnknownTopicOrPartitionError:
                logging.error('Adding %s to skip list', topic)
            except:
                logging.exception('failed to create kafka client')
            finally:
                if consumer is not None:
                    consumer.stop()
예제 #12
0
def main():
    """
    Usage:
        dump_to_mongodb dump <topic> --host=<host> [--consumer=<consumer>]
    """
    args = docopt(main.__doc__)
    host = args["--host"]

    print "=> Connecting to {0}...".format(host)
    logger.info("=> Connecting to {0}...".format(host))
    kafka = KafkaClient(host)
    print "=> Connected."
    logger.info("=> Connected.")
    if args["dump"]:
        topic = args["<topic>"]
        consumer_id = args["--consumer"] or "dump_to_mongodb"
        consumer = SimpleConsumer(kafka, consumer_id, topic,
                                  buffer_size=1024*200,      # 100kb
                                  fetch_size_bytes=1024*200, # 100kb
                                  max_buffer_size=None       # eliminate big message errors
                                  )
        consumer.seek(0, 1)
        while True:
            try:
                message = consumer.get_message()
                if message is None:
                    time.sleep(1)
                    continue
                val = message.message.value
                logger.info("message.message.value== %s " % val)
                print('val==', val)
                try:
                    item = json.loads(val)
                except:
                    continue
                if 'meta' in item and 'collection_name' in item['meta']:
                    _insert_item_to_monggodb(item)
            except:
                traceback.print_exc()
                break
        kafka.close()
        return 0
예제 #13
0
    def dispatch(self):
        consumer = SimpleConsumer(
            self.kafka_client,
            self.consumer_id,
            self.topic,
            buffer_size=1024 * 100,  # 100kb
            fetch_size_bytes=1024 * 100,  # 100kb
            max_buffer_size=None  # eliminate big message errors
        )
        consumer.seek(0, 1)
        i = 0
        while True:
            try:
                message = consumer.get_message()
                if message is None:
                    print datetime.datetime.now().strftime(
                        "%Y-%m-%d %H:%M:%S"), ' message is None:'
                    logger.info('message is None.')
                    time.sleep(1)
                    continue
                val = message.message.value
                try:
                    item = json.loads(val)
                    i += 1
                    self._process_item(item, i % len(self.aria2_clients))
                except:
                    print("error heppened in loads val to process : %s" % val)
                    logger.error("error heppened in loads val to process: %s" %
                                 val)
                    continue
            except:
                traceback.print_exc()
                break

        self.kafka_client.close()
        return 0
    def _run(self):
	pcount = 0
        while True:
            try:
                self._logger.info("New KafkaClient %d" % self._partition)
                kafka = KafkaClient(self._brokers ,str(os.getpid()))
                try:
                    consumer = SimpleConsumer(kafka, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.info("%s" % messag)
                    raise RuntimeError(messag)

                self._logger.info("Starting %d" % self._partition)

                # Find the offset of the last message that has been queued
                consumer.seek(0,2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %d is %s" % \
                                  (self._partition,str(mi)))
                self.start_partition()

                # start reading from last previously processed message
                consumer.seek(0,1)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        mm = consumer.get_message(timeout=None)
                        if mm is None:
                            continue
                        self._logger.debug("%d Reading offset %d" % (self._partition, mm.offset))
                        consumer.commit()
                        pcount += 1
		        if not self.msg_handler(mm):
                            self._logger.info("%d could not handle %s" % (self._partition, str(mm)))
                            raise gevent.GreenletExit
                    except TypeError:
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.info("Payload Error: %s" %  str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.info("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                gevent.sleep(2)
        self._logger.info("Stopping %d pcount %d" % (self._partition, pcount))
        return self._partoffset, self._partdb
    def _run(self):
	pcount = 0
        pause = False
        while True:
            try:
                if pause:
                    gevent.sleep(2)
                    pause = False
                self._logger.error("New KafkaClient %s" % self._topic)
                self._kfk = KafkaClient(self._brokers , "kc-" + self._topic)
		self._failed = False
                try:
                    consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.error("Error: %s trace %s" % \
                        (messag, traceback.format_exc()))
		    self._failed = True
                    raise RuntimeError(messag)

                self._logger.error("Starting %s" % self._topic)

                # Find the offset of the last message that has been queued
                consumer.seek(-1,2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %s is %s" % \
                                  (self._topic,str(mi)))

                # start reading from last previously processed message
                if mi != None:
                    consumer.seek(-1,1)
                else:
                    consumer.seek(0,0)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        mlist = consumer.get_messages(10,timeout=0.5)
                        if not self.msg_handler(mlist):
                            raise gevent.GreenletExit
                        consumer.commit()
                        pcount += len(mlist) 
                    except TypeError as ex:
                        self._logger.error("Type Error: %s trace %s" % \
                                (str(ex.args), traceback.format_exc()))
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.error("Payload Error: %s" %  str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except AssertionError as ex:
                self._partoffset = ex
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
		self._failed = True
                pause = True

        self._logger.error("Stopping %s pcount %d" % (self._topic, pcount))
        partdb = self.stop_partition()
        return self._partoffset, partdb
예제 #16
0
def main():
    """kafkadump: Kafka topic dump utility for debugging.

    Usage:
        kafkadump list --host=<host>
        kafkadump dump <topic> --host=<host> [--consumer=<consumer>]

    Examples:

        List all the topics on your local Kafka instance:

            python kafkadump.py list --host=<kafkahost>:9092

        Dump the contents of a single topic starting from offset 0:

            python kafkadump.py dump test.crawled_firehose --host=<kafkahost>:9092

        Use CTRL+C (SIGINT, KeyboardInterrupt) to stop it from polling Kafka.
        It will end by printing the total records serviced and the raw output
        of the most recent record.

    Options:
        -h --host <host>            Kafka host name where Kafka cluster will be resolved
        -c --consumer <consumer>    Consumer group ID to use for reading messages
    """
    args = docopt(main.__doc__)
    host = args["--host"]

    logging.basicConfig()

    print "=> Connecting to {0}...".format(host)
    kafka = KafkaClient(host)
    print "=> Connected."

    if args["list"]:
        for topic in kafka.topic_partitions.keys():
            print topic
        return 0
    elif args["dump"]:
        topic = args["<topic>"]
        consumer_id = args["--consumer"] or "default"
        consumer = SimpleConsumer(kafka, consumer_id, topic,
                                  buffer_size=1024 * 100,  # 100kb
                                  fetch_size_bytes=1024 * 100,  # 100kb
                                  max_buffer_size=None  # eliminate big message errors
                                  )
        consumer.seek(0, 0)
        num_records = 0
        total_bytes = 0
        item = None
        while True:
            try:
                message = consumer.get_message()
                if message is None:
                    time.sleep(1)
                    continue
                val = message.message.value
                item = json.loads(val)
                body_bytes = len(item)
                print item
                num_records = num_records + 1
                total_bytes = total_bytes + body_bytes
            except:
                traceback.print_exc()
                break
        total_mbs = float(total_bytes) / (1024 * 1024)
        print
        if item is not None:
            print json.dumps(item, indent=4)
        if num_records == 0:
            num_records = 1
        print num_records, "records", total_mbs, "megabytes", (float(total_bytes) / num_records / 1024), "kb per msg"
        kafka.close()
        return 0
예제 #17
0
class KafkaSimpleConsumer(object):
    """ Base class for consuming from kafka.
    Implement the logic to connect to kafka and consume messages.
    KafkaSimpleConsumer is a wrapper around kafka-python SimpleConsumer.
    KafkaSimpleConsumer relies on it in order to consume messages from kafka.
    KafkaSimpleConsumer does not catch exceptions raised by kafka-python.

    An instance of this class can be used as iterator
    to consume messages from kafka.

    .. warning:: This class is considered deprecated in favor of
                 K:py:class:`yelp_kafka.consumer_group.KafkaConsumerGroup`.

    :param topic: topic to consume from.
    :type topic: string.
    :param config: consumer configuration.
    :type config: dict.
    :param partitions: topic partitions to consumer from.
    :type partitions: list.
    """
    def __init__(self, topic, config, partitions=None):
        self.log = logging.getLogger(self.__class__.__name__)
        if not isinstance(topic, six.string_types):
            raise TypeError("Topic must be a string")
        self.topic = kafka_bytestring(topic)
        if partitions and not isinstance(partitions, list):
            raise TypeError("Partitions must be a list")
        self.partitions = partitions
        self.kafka_consumer = None
        self.config = config

    def connect(self):
        """ Connect to kafka and create a consumer.
        It uses config parameters to create a kafka-python
        KafkaClient and SimpleConsumer.
        """
        # Instantiate a kafka client connected to kafka.
        self.client = KafkaClient(self.config.broker_list,
                                  client_id=self.config.client_id)

        # Create a kafka SimpleConsumer.
        self.kafka_consumer = SimpleConsumer(
            client=self.client,
            topic=self.topic,
            partitions=self.partitions,
            **self.config.get_simple_consumer_args())
        self.log.debug(
            "Connected to kafka. Topic %s, partitions %s, %s", self.topic,
            self.partitions, ','.join([
                '{0} {1}'.format(k, v) for k, v in six.iteritems(
                    self.config.get_simple_consumer_args())
            ]))
        self.kafka_consumer.provide_partition_info()

    def __iter__(self):
        for partition, kafka_message in self.kafka_consumer:
            yield Message(
                partition=partition,
                offset=kafka_message[0],
                key=kafka_message[1].key,
                value=kafka_message[1].value,
            )

    def __enter__(self):
        self.connect()

    def __exit__(self, type, value, tb):
        self.close()

    def close(self):
        """Disconnect from kafka.
        If auto_commit is enabled commit offsets before disconnecting.
        """
        if self.kafka_consumer.auto_commit is True:
            try:
                self.commit()
            except:
                self.log.exception("Commit error. "
                                   "Offsets may not have been committed")
        # Close all the connections to kafka brokers. KafkaClient open
        # connections to all the partition leaders.
        self.client.close()

    def get_message(self, block=True, timeout=0.1):
        """Get message from kafka. It supports the same arguments of get_message
        in kafka-python SimpleConsumer.

        :param block: If True, the API will block till at least a message is fetched.
        :type block: boolean
        :param timeout: If block is True, the function will block for the specified
                        time (in seconds).
                        If None, it will block forever.

        :returns: a Kafka message
        :rtype: Message namedtuple, which consists of: partition number,
                offset, key, and message value
        """
        fetched_message = self.kafka_consumer.get_message(block, timeout)
        if fetched_message is None:
            # get message timed out returns None
            return None
        else:
            partition, kafka_message = fetched_message
            return Message(
                partition=partition,
                offset=kafka_message[0],
                key=kafka_message[1].key,
                value=kafka_message[1].value,
            )

    def commit(self, partitions=None):
        """Commit offset for this consumer group
        :param partitions: list of partitions to commit, default commits to all
        partitions.
        :return: True on success, False on failure.
        """
        if partitions:
            return self.kafka_consumer.commit(partitions)
        else:
            return self.kafka_consumer.commit()

    def commit_message(self, message):
        """Commit the message offset for this consumer group. This function does not
        take care of the consumer offset tracking. It should only be used if
        auto_commit is disabled and the commit function never called.

        .. note:: all the messages received before message itself will be committed
                  as consequence.

        :param message: message to commit.
        :type message: Message namedtuple, which consists of: partition number,
                       offset, key, and message value
        :return: True on success, False on failure.
        """
        reqs = [
            OffsetCommitRequest(
                self.topic,
                message.partition,
                message.offset,
                None,
            )
        ]

        try:
            if self.config.offset_storage in [None, 'zookeeper', 'dual']:
                self.client.send_offset_commit_request(self.config.group_id,
                                                       reqs)
            if self.config.offset_storage in ['kafka', 'dual']:
                self.client.send_offset_commit_request_kafka(
                    self.config.group_id, reqs)
        except KafkaError as e:
            self.log.error("%s saving offsets: %s", e.__class__.__name__, e)
            return False
        else:
            return True
class KafkaPythonClientSimple(PythonClient):
    def __init__(self,topic=topic_name, consumerGroup="perftest", kafkaHost=kafka_host, zookeeperHost=zookeeper_host):
        self.config["topic"] = topic
        self.config["kafkaHost"] = kafkaHost
        self.config["zookeeperHost"] = zookeeperHost
        self.config["consumerGroup"] = consumerGroup
        self.client = SimpleClient(self.config["kafkaHost"])
        super(KafkaPythonClientSimple, self).__init__()

    def createProducer(self, kafkaSync):
        self.config["kafkaSync"] = kafkaSync
        if self.config["kafkaSync"] == True:
            self.producer = SimpleProducer(self.client, async=False)
        else:
            print "ENOIMPL: async not impl. for kafka-python-simple"

    def createConsumer(self):
        self.consumer = SimpleConsumer(self.client,
            topic=self.config["topic"],
            group=self.config["consumerGroup"],
            auto_commit= True,
            max_buffer_size=3000000,
            iter_timeout=5)

    def produce(self, num_msg=20000):
        self.msgCount = num_msg
        for x in range (self.msgCount):
            self.prtProgress(x, 10000)
            self.producer.send_messages(self.config["topic"], self.msg)
        if (x >= 10000):
            sys.stdout.write('\n')

    def consume(self, num_msg=0):
        count = 0
        while True:
            message=self.consumer.get_message(block=False, timeout=1)
#       don't use this construct "for message in self.consumer:" instead of "while..." - much slower!
            if message is None:
#               print "consume, msg is None"
                break
            if len(message) == 0:
#               print "consume, len(msg) is 0"
                break
            count += 1
            self.prtProgress(count, 10000)
        sys.stdout.write('\n')
        if num_msg >  0:
            if count != num_msg:
                print "ERROR: KafkaPythonClientSimple.consume: # of messages not as expected, read: {}, expected: {}".format(count, num_msg)
        return count

    def startProducer(self):
        pass

    def stopProducer(self):
        self.beforeFlushTimer(self.timeDict['producer'])
        self.producer.stop()

    def stopConsumer(self): pass

    def initCount(self):
        self.consume(0)

    def finalize(self): pass
예제 #19
0
            b"%s" % (args.offset))

    try:
        if args.broker:
            kclient = KafkaClient("%s" % (args.broker))

            # add support for more than 1 parititon
            consumer = SimpleConsumer(kclient,
                                      args.consumer,
                                      args.topic,
                                      partitions=[0])
            consumer.max_buffer_size = None
            if args.offset:
                consumer.seek(0, 1)

            message = consumer.get_message()
            if message:
                print "DEBUG: restoring"
                print("MSG: " + str(message[1][3]) + "\tOFFSET: " +
                      str(message[0]) + "\t KEY: " + str(message.message.key))

        if not args.set:
            zk.set(
                '/consumers/{0}/offsets/{1}/{2}'.format(
                    args.consumer, args.topic, args.partition),
                b"%s" % (old_offset))
        else:
            print "Old offset %s" % (old_offset)
            print "New offset %s" % (args.offset)
    except:
        # zk.set('/consumers/{0}/offsets/{1}/{2}'.format(args.consumer, args.topic, args.partition), b"%s" % (old_offset))
예제 #20
0
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(levelname)s %(message)s')

    workers = {}
    brokers = "localhost:9092,localhost:9093,localhost:9094"
    group = "workers"

    kafka = KafkaClient(brokers, str(os.getpid()))
    cons = SimpleConsumer(kafka, group, "ctrl")
    cons.provide_partition_info()
    print "Starting control"
    end_ready = False
    while end_ready == False:
        try:
            while True:
                part, mmm = cons.get_message(timeout=None)
                mm = mmm.message
                print "Consumed ctrl " + str(mm)
                if mm.value == "start":
                    if workers.has_key(mm.key):
                        print "Dup partition %s" % mm.key
                        raise ValueError
                    else:
                        ph = UveStreamProc(brokers, int(mm.key),
                                           "uve-" + mm.key, "alarm-x" + mm.key,
                                           logging)
                        ph.start()
                        workers[int(mm.key)] = ph
                elif mm.value == "stop":
                    #import pdb; pdb.set_trace()
                    if workers.has_key(int(mm.key)):
    logging.basicConfig(level=logging.INFO,
        format='%(asctime)s %(levelname)s %(message)s')

    workers = {}
    brokers = "localhost:9092,localhost:9093,localhost:9094"
    group = "workers"
    
    kafka = KafkaClient(brokers,str(os.getpid()))
    cons = SimpleConsumer(kafka, group, "ctrl")
    cons.provide_partition_info()
    print "Starting control"
    end_ready = False
    while end_ready == False:
	try:
	    while True:
		part, mmm = cons.get_message(timeout=None)
                mm = mmm.message
		print "Consumed ctrl " + str(mm)
                if mm.value == "start":
                    if workers.has_key(mm.key):
                        print "Dup partition %s" % mm.key
                        raise ValueError
                    else:
                        ph = UveStreamProc(brokers, int(mm.key), "uve-" + mm.key, "alarm-x" + mm.key, logging)
                        ph.start()
                        workers[int(mm.key)] = ph
                elif mm.value == "stop":
                    #import pdb; pdb.set_trace()
                    if workers.has_key(int(mm.key)):
                        ph = workers[int(mm.key)]
                        gevent.kill(ph)
예제 #22
0

if __name__ == '__main__':

    # Streaming context
    conf = SparkConf().setMaster("local[2]")
    sc = SparkContext.getOrCreate(conf=conf)
    ssc = StreamingContext(sc, 1)

    # Kafka consumer for Component weight
    kafka = KafkaClient('localhost:9092')
    consumer = SimpleConsumer(kafka,
                              topic="weights",
                              group="consumer",
                              auto_offset_reset='latest')
    weights = consumer.get_message()[1]
    weight = weights.value.decode('utf-8').split(',')

    # Get model
    model = get_model(weight, pretrained=False)

    # load data from Kafka
    directKafkaStream = KafkaUtils.createDirectStream(
        ssc, ["data"], {"metadata.broker.list": "localhost:9092"})

    # parse
    test_data = directKafkaStream.map(lambda line: line[1].split(',')).map(
        lambda row: [int(x) for x in row]).map(parse_point)

    # Predict and Train
    test_data.map(lambda row: [
예제 #23
0
파일: kspy.py 프로젝트: stonefury/devops
    data, stats = zk.get('/consumers/{0}/offsets/{1}/{2}'.format(args.consumer, args.topic, args.partition))
    old_offset = data.decode()

    if args.offset:
        zk.set('/consumers/{0}/offsets/{1}/{2}'.format(args.consumer, args.topic, args.partition), b"%s" % (args.offset))

    try:
        if args.broker:
            kclient = KafkaClient("%s" % (args.broker))

            # add support for more than 1 parititon
            consumer = SimpleConsumer(kclient, args.consumer, args.topic, partitions=[0])
            consumer.max_buffer_size = None
            if args.offset:
                consumer.seek(0, 1)

            message = consumer.get_message()
            if message:
                print "DEBUG: restoring"
                print("MSG: " + str(message[1][3]) + "\tOFFSET: " + str(message[0]) + "\t KEY: " + str(message.message.key) )

        if not args.set:
            zk.set('/consumers/{0}/offsets/{1}/{2}'.format(args.consumer, args.topic, args.partition), b"%s" % (old_offset))
        else:
            print "Old offset %s" % (old_offset)
            print "New offset %s" % (args.offset)
    except:
        # zk.set('/consumers/{0}/offsets/{1}/{2}'.format(args.consumer, args.topic, args.partition), b"%s" % (old_offset))
        pass

    def _run(self):
	pcount = 0
        pause = False
        while True:
            try:
                if pause:
                    gevent.sleep(2)
                    pause = False
                self._logger.error("New KafkaClient %s" % self._topic)
                self._kfk = KafkaClient(self._brokers , "kc-" + self._topic)
                try:
                    consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.error("Error: %s trace %s" % \
                        (messag, traceback.format_exc()))
                    raise RuntimeError(messag)

                self._logger.error("Starting %s" % self._topic)

                # Find the offset of the last message that has been queued
                consumer.seek(-1,2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %s is %s" % \
                                  (self._topic,str(mi)))

                # start reading from last previously processed message
                if mi != None:
                    consumer.seek(-1,1)
                else:
                    consumer.seek(0,0)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        mlist = consumer.get_messages(10,timeout=0.5)
                        if not self.msg_handler(mlist):
                            raise gevent.GreenletExit
                        consumer.commit()
                        pcount += len(mlist) 
                    except TypeError as ex:
                        self._logger.error("Type Error: %s trace %s" % \
                                (str(ex.args), traceback.format_exc()))
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.error("Payload Error: %s" %  str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except AssertionError as ex:
                self._partoffset = ex
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                pause = True

        self._logger.error("Stopping %s pcount %d" % (self._topic, pcount))
        partdb = self.stop_partition()
        return self._partoffset, partdb
    def _run(self):
        pcount = 0
        while True:
            try:
                self._logger.info("New KafkaClient %d" % self._partition)
                kafka = KafkaClient(self._brokers, str(os.getpid()))
                try:
                    consumer = SimpleConsumer(kafka,
                                              self._group,
                                              self._topic,
                                              buffer_size=4096 * 4,
                                              max_buffer_size=4096 * 32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.info("%s" % messag)
                    raise gevent.GreenletExit

                self._logger.info("Starting %d" % self._partition)

                # Find the offset of the last message that has been queued
                consumer.seek(0, 2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %d is %s" % \
                                  (self._partition,str(mi)))

                # start reading from last previously processed message
                consumer.seek(0, 1)

                if mi != None:
                    count = 0
                    self._logger.info("Catching Up %d" % self._partition)
                    loff = mi.offset
                    coff = 0
                    while True:
                        try:
                            mm = consumer.get_message(timeout=None)
                            count += 1
                            if not self.msg_handler(mm):
                                self._logger.info("%d could not process %s" %
                                                  (self._partition, str(mm)))
                                raise gevent.GreenletExit
                            consumer.commit()
                            coff = mm.offset
                            self._logger.info("Syncing offset %d" % coff)
                            if coff == loff:
                                break
                        except Exception as ex:
                            self._logger.info("Sync Error %s" % str(ex))
                            break
                    if coff != loff:
                        self._logger.info("Sync Failed for %d count %d" %
                                          (self._partition, count))
                        continue
                    else:
                        self._logger.info("Sync Completed for %d count %d" %
                                          (self._partition, count))

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        mm = consumer.get_message(timeout=None)
                        if mm is None:
                            continue
                        consumer.commit()
                        pcount += 1
                        if not self.msg_handler(mm):
                            self._logger.info("%d could not handle %s" %
                                              (self._partition, str(mm)))
                            raise gevent.GreenletExit
                    except TypeError:
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.info("Payload Error: %s" % str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.info("%s" % messag)
                gevent.sleep(1)
        self._logger.info("Stopping %d pcount %d" % (self._partition, pcount))
        return self._partoffset, self._partdb
예제 #26
0
    def _run(self):
        pcount = 0
        while True:
            try:
                self._logger.error("New KafkaClient %d" % self._partition)
                self._kfk = KafkaClient(self._brokers, str(os.getpid()))
                try:
                    consumer = SimpleConsumer(self._kfk,
                                              self._group,
                                              self._topic,
                                              buffer_size=4096 * 4,
                                              max_buffer_size=4096 * 32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.info("%s" % messag)
                    raise RuntimeError(messag)

                self._logger.error("Starting %d" % self._partition)

                # Find the offset of the last message that has been queued
                consumer.seek(0, 2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %d is %s" % \
                                  (self._partition,str(mi)))

                # start reading from last previously processed message
                if mi != None:
                    consumer.seek(0, 1)
                else:
                    consumer.seek(0, 0)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        self.resource_check()
                        mlist = consumer.get_messages(10, timeout=0.2)
                        for mm in mlist:
                            if mm is None:
                                continue
                            self._logger.debug("%d Reading offset %d" % \
                                    (self._partition, mm.offset))
                            consumer.commit()
                            pcount += 1
                            if not self.msg_handler(mm):
                                self._logger.info("%d could not handle %s" %
                                                  (self._partition, str(mm)))
                                raise gevent.GreenletExit
                    except TypeError as ex:
                        self._logger.error("Type Error: %s trace %s" % \
                                (str(ex.args), traceback.format_exc()))
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.error("Payload Error: %s" % str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except AssertionError as ex:
                self._partoffset = ex
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                gevent.sleep(2)

        partdb = {}
        for coll in self._uvedb.keys():
            partdb[coll] = {}
            for gen in self._uvedb[coll].keys():
                partdb[coll][gen] = {}
                for tab in self._uvedb[coll][gen].keys():
                    for rkey in self._uvedb[coll][gen][tab].keys():
                        uk = tab + ":" + rkey
                        partdb[coll][gen][uk] = \
                            set(self._uvedb[coll][gen][tab][rkey].keys())

        self._logger.error("Stopping %d pcount %d" % (self._partition, pcount))
        self.stop_partition()
        return self._partoffset, partdb
    def _run(self):
	pcount = 0
        while True:
            try:
                self._logger.error("New KafkaClient %d" % self._partition)
                self._kfk = KafkaClient(self._brokers ,str(os.getpid()))
                try:
                    consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.info("%s" % messag)
                    raise RuntimeError(messag)

                self._logger.error("Starting %d" % self._partition)

                # Find the offset of the last message that has been queued
                consumer.seek(0,2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %d is %s" % \
                                  (self._partition,str(mi)))

                # start reading from last previously processed message
                if mi != None:
                    consumer.seek(0,1)
                else:
                    consumer.seek(0,0)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        self.resource_check()
                        mlist = consumer.get_messages(10,timeout=0.2)
                        for mm in mlist:
                            if mm is None:
                                continue
                            self._logger.debug("%d Reading offset %d" % \
                                    (self._partition, mm.offset))
                            consumer.commit()
                            pcount += 1
                            if not self.msg_handler(mm):
                                self._logger.info("%d could not handle %s" % (self._partition, str(mm)))
                                raise gevent.GreenletExit
                    except TypeError as ex:
                        self._logger.error("Type Error: %s trace %s" % \
                                (str(ex.args), traceback.format_exc()))
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.error("Payload Error: %s" %  str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except AssertionError as ex:
                self._partoffset = ex
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                gevent.sleep(2)

        partdb = {}
        for coll in self._uvedb.keys():
            partdb[coll] = {}
            for gen in self._uvedb[coll].keys():
                partdb[coll][gen] = {}
                for tab in self._uvedb[coll][gen].keys():
                    for rkey in self._uvedb[coll][gen][tab].keys():
                        uk = tab + ":" + rkey
                        partdb[coll][gen][uk] = \
                            set(self._uvedb[coll][gen][tab][rkey].keys())

        self._logger.error("Stopping %d pcount %d" % (self._partition, pcount))
        self.stop_partition()
        return self._partoffset, partdb