Example #1
0
 def __init__(self, host, port, schema_path, topic, nbmsg, consumer_timeout):
     self.topic = topic
     self.nbmsg = nbmsg
     self.sent_msg = 0
     self.host = host
     self.port = port
     self.sent = [-100] * self.nbmsg
     self.rcv = [-100] * self.nbmsg
     self.runtag = str(random.randint(10, 100000))
     try:
         self.broker = KafkaClient("%s:%d" % (self.host, self.port))
     except:
         raise ValueError(
             "KafkaClient (%s:%d) - init failed" % (self.host, self.port))
     try:
         self.producer = SimpleProducer(self.broker)
     except:
         raise ValueError(
             "SimpleProducer (%s:%d) - init failed" % (self.host, self.port))
     try:
         self.consumer = SimpleConsumer(
             self.broker, "testbot", topic, iter_timeout=consumer_timeout)
     except:
         raise ValueError(
             "SimpleConsumer (%s:%d) - init failed" % (self.host, self.port))
     try:
         self.schema = avro.schema.parse(open(schema_path).read())
     except:
         raise ValueError(
             "Prod2Cons load schema (%s) - init failed" % (schema_path))
def main():
    kafka = KafkaClient("localhost:9092")
    print("Consumer established connection to kafka")
    consumer = SimpleConsumer(kafka, "my-group", "test")
    for message in consumer:
        # This will wait and print messages as they become available
        print(message)
Example #3
0
    def __init__(self, info):
        self.host = info['attributes']['host']
        self.group = info['attributes']['group']
        self.topic = info['attributes']['topic']

        self.client = KafkaClient(self.host)
        self.consumer = SimpleConsumer(client, self.group, self.topic)
Example #4
0
    def run(self):
        client = KafkaClient("vsu-01:9092")
        consumer = SimpleConsumer(client, "test-group", "my.price")

        for message in consumer:

            print(message)
Example #5
0
    def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(ListeningKafkaSpider,
                       cls).from_crawler(crawler, *args, **kwargs)

        if not hasattr(spider, 'topic') or not spider.topic:
            spider.topic = '%s-starturls' % spider.name

        hosts = crawler.settings.get('SCRAPY_KAFKA_HOSTS', 'localhost:9092')
        consumer_group = crawler.settings.get(
            'SCRAPY_KAFKA_SPIDER_CONSUMER_GROUP', 'scrapy-kafka')
        _kafka = SimpleClient(hosts)
        # wait at most 1sec for more messages. Otherwise continue
        spider.consumer = SimpleConsumer(_kafka,
                                         consumer_group,
                                         spider.topic,
                                         auto_commit=True,
                                         iter_timeout=1.0)
        # idle signal is called when the spider has no requests left,
        # that's when we will schedule new requests from kafka topic
        crawler.signals.connect(spider.spider_idle, signal=signals.spider_idle)
        crawler.signals.connect(spider.item_scraped,
                                signal=signals.item_scraped)
        logger.info("Reading URLs from kafka topic '%s'" % spider.kafka_topic)

        return spider
Example #6
0
def get_offsets(offsets_after_time_millis,
                conn_params=config.DEFAULT_CONN_PARAMS):

    curr_time = long(time.time() * 1000)

    for host in config.bagheera_nodes:
        for topic in config.topics:
            for partition in config.partitions:
                consumer = SimpleConsumer(host, conn_params['port'],
                                          conn_params['nrecs'],
                                          conn_params['bufsize'])

                offset = long(
                    consumer.getOffsetsBefore(topic, partition,
                                              offsets_after_time_millis, 1)[0])

                consumer.close()

                System.out.println(
                    json.dumps({
                        'time_millis': curr_time,
                        'hostname': host,
                        'topic': topic,
                        'partition': partition,
                        'offset': offset
                    }))
Example #7
0
    def run(self):
        client = KafkaClient(
            "10.206.216.13:19092,10.206.212.14:19092,10.206.209.25:19092")
        consumer = SimpleConsumer(client, "test-group", "guantest")

        for message in consumer:
            print(message.message.value)
Example #8
0
    def setup_kafka(self, settings):
        """Setup redis connection and idle signal.

        This should be called after the spider has set its crawler object.

        :param settings: The current Scrapy settings being used
        :type settings: scrapy.settings.Settings
        """
        if not hasattr(self, 'topic') or not self.topic:
            self.topic = '%s-starturls' % self.name

        hosts = settings.get('SCRAPY_KAFKA_HOSTS', ['localhost:9092'])
        consumer_group = settings.get('SCRAPY_KAFKA_SPIDER_CONSUMER_GROUP',
                                      'scrapy-kafka')
        _kafka = KafkaClient(hosts)
        # wait at most 1sec for more messages. Otherwise continue
        self.consumer = SimpleConsumer(_kafka,
                                       consumer_group,
                                       self.topic,
                                       auto_commit=True,
                                       iter_timeout=1.0)
        # idle signal is called when the spider has no requests left,
        # that's when we will schedule new requests from kafka topic
        self.crawler.signals.connect(self.spider_idle,
                                     signal=signals.spider_idle)
        self.crawler.signals.connect(self.item_scraped,
                                     signal=signals.item_scraped)
        self.log("Reading URLs from kafka topic '%s'" % self.kafka_topic)
Example #9
0
 def __init__(self, factory, destination):
     self.factory = factory
     self.destination = destination
     self.consumer = SimpleConsumer(self.factory, "test-group",
                                    self.destination)
     self.rate = PerfRate()
     threading.Thread.__init__(self)
Example #10
0
    def run(self):
        client = KafkaClient("172.17.8.101:9092")
        consumer = SimpleConsumer(client, "test-group", "topic")

        batch_size = 300
        global_counter = 0
        counter = 0
        batch = BatchStatement()

        for message in consumer:
            if counter >= batch_size:
                session.execute(batch)
                batch = BatchStatement()
                counter = 0

            temp = yaml.load(message[1][3])
            #            print temp
            global_counter += 1
            print global_counter
            prepared = session.prepare("""
                    INSERT INTO testkeyspace.meter_data (timestamp, id, P_1, P_2, P_3, Q_1, Q_2, Q_3)
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                    """)
            batch.add(prepared, (temp["timestamp"], uuid.UUID(
                temp["id"]), temp["P_1"], temp["P_2"], temp["P_3"],
                                 temp["Q_1"], temp["Q_2"], temp["Q_3"]))
            counter += 1
Example #11
0
def kafka_pull(message_queue):
    global g_conf
    global g_master_logger
    ret = True
    while True:
        try:
            if is_quit():
                g_master_logger.info("thread quit: [%d]" % os.getpid())
                return True

            random_v = random.randint(0, len(g_conf["broker_list"]) - 1)
            broker = g_conf["broker_list"][random_v]
            g_master_logger.info("use broker is [%s]" % broker)
            partition_set = set([0])

            # client
            client = KafkaClient(broker)
            consumer = SimpleConsumer(
                client,
                g_conf["msg_group_name"],
                g_conf["msg_topic_name"],
                partitions=partition_set,
                auto_commit_every_n=g_conf["auto_commit_every_n"],
                auto_commit_every_t=g_conf["auto_commit_every_t"],
                fetch_size_bytes=g_conf["fetch_size_bytes"],
                buffer_size=g_conf["buffer_size"],
                max_buffer_size=g_conf["max_buffer_size"])

            cnt = 0
            for message in consumer:
                cnt += 1
                if cnt % 10000 == 0:
                    g_master_logger.info("msg consumer cnt is [%d] queue:%u" %
                                         (cnt, message_queue.qsize()))
                if is_quit():
                    consumer.stop()
                    g_master_logger.info("thread fetch msg quit: [%d]" %
                                         os.getpid())
                    break

                value = message.message.value
                if value == None:
                    g_master_logger.warning("value is none, msg is [%s]" %
                                            str(message))
                    continue
                if len(value) == 0:
                    g_master_logger.warning("value len is 0, msg is [%s]" %
                                            str(message))
                    continue
                if check_pkg(value) == False:
                    continue
                message_queue.put(message)

        except Exception, e:
            g_master_logger.error(
                "work error, exception is [%s], traceback is [%s]" %
                (e, traceback.format_exc()))
            time.sleep(5)
            continue
 def __init__(self, addr, group, topic):
     self.client = KafkaClient(addr)
     self.consumer = SimpleConsumer(self.client, group, topic, max_buffer_size=1310720000)
     self.temp_file_path = None
     self.temp_file = None
     self.topic = topic
     self.group = group
     self.block_cnt = 0
Example #13
0
 def listen(self):
     client = KafkaClient(hosts(self.server_list, self.kafka_port))
     client.ensure_topic_exists(self.topic_name)
     # print client.topic_partitions()
     consumer = SimpleConsumer(client, self.consumer_name, self.topic_name)
     for message in consumer:
         value = message.message.value
         print value
Example #14
0
 def register_consumer(self, callback, parse_json, topic_group, topic_name):
     consumer = SimpleConsumer(self.client,
                               topic_group,
                               topic_name,
                               max_buffer_size=None)
     consumer_thread = ConsumerThread(consumer, callback, parse_json)
     print "Starting new subscriber for topic " + topic_name + ' with group ' + topic_group
     consumer_thread.start()
Example #15
0
    def __init__(self, cache):

        threading.Thread.__init__(self)

        self.kafka = KafkaClient(self.kafkaHost)
        self.consumer = SimpleConsumer(self.kafka, "test-group", "collector")

        self.cache = cache
Example #16
0
def consume():

    client = KafkaClient("localhost:9092")
    consumer = SimpleConsumer(client, "test-group", "weather")

    for message in consumer:
        print(message)

    return 'Message reading..'
Example #17
0
def dataConsumer(topic, group='default', count=1, dateStr=''):
    kafka_consumer = SimpleConsumer(KafkaClient(MasterPublicIP + ":9092"), \
                                    group, topic, max_buffer_size=MAX_BUFFER_SIZE)
    messages = kafka_consumer.get_messages(count=count)
    dataList = []
    for message in messages:
        dataList.append(message.message.value)
    if len(dataList) > 0:
        flush2HDFS(dataList, dateStr)
Example #18
0
    def run(self):
        #client = KafkaClient("localhost:9092")
        client = KafkaClient("kafka_host:9092")
#        consumer = SimpleConsumer(client, "test-group", "my-topic")
        consumer = SimpleConsumer(client, "python-group", "test")


        for message in consumer:
            print(message)
    def run(self):
        client = KafkaClient("10.206.216.13:19092,10.206.212.14:19092,10.206.209.25:19092")
        consumer = SimpleConsumer(client, "test-group", "jiketest",auto_commit=False,partitions=self.part)

        consumer.seek(0,0)

        while True:
            message = consumer.get_message(True,60)
            self.__offset = message.offset
            print message.message.value
Example #20
0
 def __init__(self, addr, group, topic):
     self.client = KafkaClient(addr)
     self.consumer = SimpleConsumer(self.client, group, topic, max_buffer_size=1310720000,auto_commit=False)
     self.temp_file_path = None
     self.temp_file = None
     self.hadoop_path = "/user/AdReport/%s/history" %(topic)
     self.cached_path = "/user/AdReport/%s/cached" %(topic)
     self.topic = topic
     self.group = group
     self.block_cnt = 0
Example #21
0
 def __init__(self, conn_pool, topic, group):
     self.conn_pool = conn_pool
     self.topic = topic
     self.group = group
     self.kafka = KafkaClient(self.conn_pool)
     self.consumer = SimpleConsumer(self.kafka,
                                    self.group,
                                    self.topic,
                                    max_buffer_size=None)
     self.consumer.seek(0, 2)  # move to the tail of the queue
Example #22
0
 def consume_now():
     """
     consume a pic from kafka
     """
     client = KafkaClient(settings.KAFKA_SERVER)
     consumer = SimpleConsumer(client,
                               "my_group",
                               "pictures",
                               fetch_size_bytes=30000000)
     for message in consumer:
         print message
    def __init__(self):
        self.kafka = KafkaClient(hosts=KAFKA_SERVER)
        self.consumer = SimpleConsumer(self.kafka,
                                       KAFKA_CONSUMER_GROUP,
                                       KAFKA_TOPIC,
                                       auto_commit=True,
                                       max_buffer_size=1024 * 1024)

        self.submitter = HeadquarterSubmitter(HQ_BASE_URL, HQ_JOB)

        self.stats = dict(fetched=0, scheduled=0, discarded=0)
    def getLogLines(self):
        #infinite loop of magical random numbers
        while not thread_stop_event.isSet():
            client = SimpleClient('127.0.0.1:9092')
            consumer = SimpleConsumer(client, "my-producer", "alarm")

            for message in consumer:
                print message.message.value
                socketio.emit('newmessage', {'message': message.message.value},
                              namespace='/test')
                sleep(self.delay)
Example #25
0
 def listen(self):
     client = KafkaClient(hosts(self.server_list, self.kafka_port))
     client.ensure_topic_exists(self.topic_name)
     consumer = SimpleConsumer(client, self.consumer_name, self.topic_name)
     for message in consumer:
         value = message.message.value
         value = json.loads(value)
         if value['no'] % 10 == 0:
             print value
             subject = "test mail => "+message.message.value
             body = "Good day! Now is "+datetime.now().strftime('%Y-%m-%d %H:%M:%S')
             send_mail(self.email_address,subject,body)
Example #26
0
        def run(self):
            client = None
            consumer = None
            try:
                prev = None
                # print("Starting Kafka Client")
                # print("Kafka topic: {}").format(self.topic)
                print get_kafka_hosts()
                client = KafkaClient(hosts=get_kafka_hosts())
                consumer = SimpleConsumer(client=client,
                                          group=self.groupName.encode(
                                              'ascii', 'ignore'),
                                          topic=self.topic,
                                          iter_timeout=5)
                consumer.seek(0, 1)
                print '[Kafka Consumer] START'
                print 'Topic: {}'.format(self.topic)
                print 'Listening incoming message...'
                print '========================================================='
                # print("Listening kafka message...")

                while self.stopCpu is False:
                    for message in consumer.get_messages(count=5, block=False):
                        if self.stopCpu is True:
                            # print("Kafka Consumer Listening Stopped")
                            break

                        if message:
                            offset = message.offset
                            value = message.message.value
                            print 'msg: {0}, offset: {1}'.format(value, offset)

                            if len(value) > 0:
                                # chartdata = []
                                # j_val = json.loads(value)
                                # j_val['offset'] = offset
                                # chartdata.append(j_val)
                                # print("destination => ws"+str(self.pid))
                                # self.parentOj.emit("ws"+str(self.type), chartdata)
                                # self.parentOj.emit(self.topic, value)
                                self.parentOj.emit("ws" + str(self.pid), value)

                print '[Kafka Consumer] STOP'
                print 'Topic: {}'.format(self.topic)
                print 'Stop listening...'
                print '========================================================'
                # print("Listening kafka Stopped")
                consumer.stop()
                client.close()
            except Exception as e:
                consumer.stop()
                client.close()
Example #27
0
    def spiderIdle(self, spider):
        consumer = SimpleConsumer(self.kafka_conn, "test", "commands")
        for msg in consumer.get_messages():
            print msg.message.value
            if msg.message.value == spider.name + '_stop':
                print 'stop'
                spider.spider_pause()
                #spider.close(spider,'ok')
                #self.scrapy.engine.close_spider(spider, 'closespider_itemcount')

            if msg.message.value == spider.name + '_start':
                #self.scrapy.engine.scraper.open_spider(spider)
                spider.spider_resume()
Example #28
0
 def __init__(self, addr, group, topic):
     """Initialize Consumer with kafka broker IP, group, and topic."""
     self.client = KafkaClient(addr)
     self.consumer = SimpleConsumer(self.client,
                                    group,
                                    topic,
                                    max_buffer_size=1310720000)
     self.temp_file_path = None
     self.temp_file = None
     self.hadoop_path = "/insight/artsy/geo"
     self.topic = topic
     self.group = group
     self.block_cnt = 0
def _build_topic_to_consumer_map(kafka_client, topics):
    """Build a mapping of topic to SimpleConsumer object for each topic. We
    use a single SingleConsumer per topic since SimpleConsumer allows us to
    seek, but only supports a single topic.
    """
    # TODO(joshszep|DATAPIPE-2113): Update to use ConsumerGroup rather than SimpleConsumers
    return {
        topic: SimpleConsumer(client=kafka_client,
                              group=None,
                              topic=topic,
                              auto_commit=False)
        for topic in topics
    }
Example #30
0
    def setup(self):
        self.redis_conn = redis.Redis(host=self.settings.REDIS_HOST,
                                      port=self.settings.REDIS_PORT)

        self.kafka_conn.ensure_topic_exists(self.settings.KAFKA_INCOMING_TOPIC)
        self.consumer = SimpleConsumer(self.kafka_conn,
                                       self.settings.KAFKA_GROUP,
                                       self.settings.KAFKA_INCOMING_TOPIC,
                                       auto_commit=True,
                                       iter_timeout=1.0)

        self.result_method = self.get_method(self.settings.SCHEMA_METHOD)

        self.validator = self.extend_with_default(Draft4Validator)