def __init__(self, host, port, schema_path, topic, nbmsg, consumer_timeout): self.topic = topic self.nbmsg = nbmsg self.sent_msg = 0 self.host = host self.port = port self.sent = [-100] * self.nbmsg self.rcv = [-100] * self.nbmsg self.runtag = str(random.randint(10, 100000)) try: self.broker = KafkaClient("%s:%d" % (self.host, self.port)) except: raise ValueError( "KafkaClient (%s:%d) - init failed" % (self.host, self.port)) try: self.producer = SimpleProducer(self.broker) except: raise ValueError( "SimpleProducer (%s:%d) - init failed" % (self.host, self.port)) try: self.consumer = SimpleConsumer( self.broker, "testbot", topic, iter_timeout=consumer_timeout) except: raise ValueError( "SimpleConsumer (%s:%d) - init failed" % (self.host, self.port)) try: self.schema = avro.schema.parse(open(schema_path).read()) except: raise ValueError( "Prod2Cons load schema (%s) - init failed" % (schema_path))
def main(): kafka = KafkaClient("localhost:9092") print("Consumer established connection to kafka") consumer = SimpleConsumer(kafka, "my-group", "test") for message in consumer: # This will wait and print messages as they become available print(message)
def __init__(self, info): self.host = info['attributes']['host'] self.group = info['attributes']['group'] self.topic = info['attributes']['topic'] self.client = KafkaClient(self.host) self.consumer = SimpleConsumer(client, self.group, self.topic)
def run(self): client = KafkaClient("vsu-01:9092") consumer = SimpleConsumer(client, "test-group", "my.price") for message in consumer: print(message)
def from_crawler(cls, crawler, *args, **kwargs): spider = super(ListeningKafkaSpider, cls).from_crawler(crawler, *args, **kwargs) if not hasattr(spider, 'topic') or not spider.topic: spider.topic = '%s-starturls' % spider.name hosts = crawler.settings.get('SCRAPY_KAFKA_HOSTS', 'localhost:9092') consumer_group = crawler.settings.get( 'SCRAPY_KAFKA_SPIDER_CONSUMER_GROUP', 'scrapy-kafka') _kafka = SimpleClient(hosts) # wait at most 1sec for more messages. Otherwise continue spider.consumer = SimpleConsumer(_kafka, consumer_group, spider.topic, auto_commit=True, iter_timeout=1.0) # idle signal is called when the spider has no requests left, # that's when we will schedule new requests from kafka topic crawler.signals.connect(spider.spider_idle, signal=signals.spider_idle) crawler.signals.connect(spider.item_scraped, signal=signals.item_scraped) logger.info("Reading URLs from kafka topic '%s'" % spider.kafka_topic) return spider
def get_offsets(offsets_after_time_millis, conn_params=config.DEFAULT_CONN_PARAMS): curr_time = long(time.time() * 1000) for host in config.bagheera_nodes: for topic in config.topics: for partition in config.partitions: consumer = SimpleConsumer(host, conn_params['port'], conn_params['nrecs'], conn_params['bufsize']) offset = long( consumer.getOffsetsBefore(topic, partition, offsets_after_time_millis, 1)[0]) consumer.close() System.out.println( json.dumps({ 'time_millis': curr_time, 'hostname': host, 'topic': topic, 'partition': partition, 'offset': offset }))
def run(self): client = KafkaClient( "10.206.216.13:19092,10.206.212.14:19092,10.206.209.25:19092") consumer = SimpleConsumer(client, "test-group", "guantest") for message in consumer: print(message.message.value)
def setup_kafka(self, settings): """Setup redis connection and idle signal. This should be called after the spider has set its crawler object. :param settings: The current Scrapy settings being used :type settings: scrapy.settings.Settings """ if not hasattr(self, 'topic') or not self.topic: self.topic = '%s-starturls' % self.name hosts = settings.get('SCRAPY_KAFKA_HOSTS', ['localhost:9092']) consumer_group = settings.get('SCRAPY_KAFKA_SPIDER_CONSUMER_GROUP', 'scrapy-kafka') _kafka = KafkaClient(hosts) # wait at most 1sec for more messages. Otherwise continue self.consumer = SimpleConsumer(_kafka, consumer_group, self.topic, auto_commit=True, iter_timeout=1.0) # idle signal is called when the spider has no requests left, # that's when we will schedule new requests from kafka topic self.crawler.signals.connect(self.spider_idle, signal=signals.spider_idle) self.crawler.signals.connect(self.item_scraped, signal=signals.item_scraped) self.log("Reading URLs from kafka topic '%s'" % self.kafka_topic)
def __init__(self, factory, destination): self.factory = factory self.destination = destination self.consumer = SimpleConsumer(self.factory, "test-group", self.destination) self.rate = PerfRate() threading.Thread.__init__(self)
def run(self): client = KafkaClient("172.17.8.101:9092") consumer = SimpleConsumer(client, "test-group", "topic") batch_size = 300 global_counter = 0 counter = 0 batch = BatchStatement() for message in consumer: if counter >= batch_size: session.execute(batch) batch = BatchStatement() counter = 0 temp = yaml.load(message[1][3]) # print temp global_counter += 1 print global_counter prepared = session.prepare(""" INSERT INTO testkeyspace.meter_data (timestamp, id, P_1, P_2, P_3, Q_1, Q_2, Q_3) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """) batch.add(prepared, (temp["timestamp"], uuid.UUID( temp["id"]), temp["P_1"], temp["P_2"], temp["P_3"], temp["Q_1"], temp["Q_2"], temp["Q_3"])) counter += 1
def kafka_pull(message_queue): global g_conf global g_master_logger ret = True while True: try: if is_quit(): g_master_logger.info("thread quit: [%d]" % os.getpid()) return True random_v = random.randint(0, len(g_conf["broker_list"]) - 1) broker = g_conf["broker_list"][random_v] g_master_logger.info("use broker is [%s]" % broker) partition_set = set([0]) # client client = KafkaClient(broker) consumer = SimpleConsumer( client, g_conf["msg_group_name"], g_conf["msg_topic_name"], partitions=partition_set, auto_commit_every_n=g_conf["auto_commit_every_n"], auto_commit_every_t=g_conf["auto_commit_every_t"], fetch_size_bytes=g_conf["fetch_size_bytes"], buffer_size=g_conf["buffer_size"], max_buffer_size=g_conf["max_buffer_size"]) cnt = 0 for message in consumer: cnt += 1 if cnt % 10000 == 0: g_master_logger.info("msg consumer cnt is [%d] queue:%u" % (cnt, message_queue.qsize())) if is_quit(): consumer.stop() g_master_logger.info("thread fetch msg quit: [%d]" % os.getpid()) break value = message.message.value if value == None: g_master_logger.warning("value is none, msg is [%s]" % str(message)) continue if len(value) == 0: g_master_logger.warning("value len is 0, msg is [%s]" % str(message)) continue if check_pkg(value) == False: continue message_queue.put(message) except Exception, e: g_master_logger.error( "work error, exception is [%s], traceback is [%s]" % (e, traceback.format_exc())) time.sleep(5) continue
def __init__(self, addr, group, topic): self.client = KafkaClient(addr) self.consumer = SimpleConsumer(self.client, group, topic, max_buffer_size=1310720000) self.temp_file_path = None self.temp_file = None self.topic = topic self.group = group self.block_cnt = 0
def listen(self): client = KafkaClient(hosts(self.server_list, self.kafka_port)) client.ensure_topic_exists(self.topic_name) # print client.topic_partitions() consumer = SimpleConsumer(client, self.consumer_name, self.topic_name) for message in consumer: value = message.message.value print value
def register_consumer(self, callback, parse_json, topic_group, topic_name): consumer = SimpleConsumer(self.client, topic_group, topic_name, max_buffer_size=None) consumer_thread = ConsumerThread(consumer, callback, parse_json) print "Starting new subscriber for topic " + topic_name + ' with group ' + topic_group consumer_thread.start()
def __init__(self, cache): threading.Thread.__init__(self) self.kafka = KafkaClient(self.kafkaHost) self.consumer = SimpleConsumer(self.kafka, "test-group", "collector") self.cache = cache
def consume(): client = KafkaClient("localhost:9092") consumer = SimpleConsumer(client, "test-group", "weather") for message in consumer: print(message) return 'Message reading..'
def dataConsumer(topic, group='default', count=1, dateStr=''): kafka_consumer = SimpleConsumer(KafkaClient(MasterPublicIP + ":9092"), \ group, topic, max_buffer_size=MAX_BUFFER_SIZE) messages = kafka_consumer.get_messages(count=count) dataList = [] for message in messages: dataList.append(message.message.value) if len(dataList) > 0: flush2HDFS(dataList, dateStr)
def run(self): #client = KafkaClient("localhost:9092") client = KafkaClient("kafka_host:9092") # consumer = SimpleConsumer(client, "test-group", "my-topic") consumer = SimpleConsumer(client, "python-group", "test") for message in consumer: print(message)
def run(self): client = KafkaClient("10.206.216.13:19092,10.206.212.14:19092,10.206.209.25:19092") consumer = SimpleConsumer(client, "test-group", "jiketest",auto_commit=False,partitions=self.part) consumer.seek(0,0) while True: message = consumer.get_message(True,60) self.__offset = message.offset print message.message.value
def __init__(self, addr, group, topic): self.client = KafkaClient(addr) self.consumer = SimpleConsumer(self.client, group, topic, max_buffer_size=1310720000,auto_commit=False) self.temp_file_path = None self.temp_file = None self.hadoop_path = "/user/AdReport/%s/history" %(topic) self.cached_path = "/user/AdReport/%s/cached" %(topic) self.topic = topic self.group = group self.block_cnt = 0
def __init__(self, conn_pool, topic, group): self.conn_pool = conn_pool self.topic = topic self.group = group self.kafka = KafkaClient(self.conn_pool) self.consumer = SimpleConsumer(self.kafka, self.group, self.topic, max_buffer_size=None) self.consumer.seek(0, 2) # move to the tail of the queue
def consume_now(): """ consume a pic from kafka """ client = KafkaClient(settings.KAFKA_SERVER) consumer = SimpleConsumer(client, "my_group", "pictures", fetch_size_bytes=30000000) for message in consumer: print message
def __init__(self): self.kafka = KafkaClient(hosts=KAFKA_SERVER) self.consumer = SimpleConsumer(self.kafka, KAFKA_CONSUMER_GROUP, KAFKA_TOPIC, auto_commit=True, max_buffer_size=1024 * 1024) self.submitter = HeadquarterSubmitter(HQ_BASE_URL, HQ_JOB) self.stats = dict(fetched=0, scheduled=0, discarded=0)
def getLogLines(self): #infinite loop of magical random numbers while not thread_stop_event.isSet(): client = SimpleClient('127.0.0.1:9092') consumer = SimpleConsumer(client, "my-producer", "alarm") for message in consumer: print message.message.value socketio.emit('newmessage', {'message': message.message.value}, namespace='/test') sleep(self.delay)
def listen(self): client = KafkaClient(hosts(self.server_list, self.kafka_port)) client.ensure_topic_exists(self.topic_name) consumer = SimpleConsumer(client, self.consumer_name, self.topic_name) for message in consumer: value = message.message.value value = json.loads(value) if value['no'] % 10 == 0: print value subject = "test mail => "+message.message.value body = "Good day! Now is "+datetime.now().strftime('%Y-%m-%d %H:%M:%S') send_mail(self.email_address,subject,body)
def run(self): client = None consumer = None try: prev = None # print("Starting Kafka Client") # print("Kafka topic: {}").format(self.topic) print get_kafka_hosts() client = KafkaClient(hosts=get_kafka_hosts()) consumer = SimpleConsumer(client=client, group=self.groupName.encode( 'ascii', 'ignore'), topic=self.topic, iter_timeout=5) consumer.seek(0, 1) print '[Kafka Consumer] START' print 'Topic: {}'.format(self.topic) print 'Listening incoming message...' print '=========================================================' # print("Listening kafka message...") while self.stopCpu is False: for message in consumer.get_messages(count=5, block=False): if self.stopCpu is True: # print("Kafka Consumer Listening Stopped") break if message: offset = message.offset value = message.message.value print 'msg: {0}, offset: {1}'.format(value, offset) if len(value) > 0: # chartdata = [] # j_val = json.loads(value) # j_val['offset'] = offset # chartdata.append(j_val) # print("destination => ws"+str(self.pid)) # self.parentOj.emit("ws"+str(self.type), chartdata) # self.parentOj.emit(self.topic, value) self.parentOj.emit("ws" + str(self.pid), value) print '[Kafka Consumer] STOP' print 'Topic: {}'.format(self.topic) print 'Stop listening...' print '========================================================' # print("Listening kafka Stopped") consumer.stop() client.close() except Exception as e: consumer.stop() client.close()
def spiderIdle(self, spider): consumer = SimpleConsumer(self.kafka_conn, "test", "commands") for msg in consumer.get_messages(): print msg.message.value if msg.message.value == spider.name + '_stop': print 'stop' spider.spider_pause() #spider.close(spider,'ok') #self.scrapy.engine.close_spider(spider, 'closespider_itemcount') if msg.message.value == spider.name + '_start': #self.scrapy.engine.scraper.open_spider(spider) spider.spider_resume()
def __init__(self, addr, group, topic): """Initialize Consumer with kafka broker IP, group, and topic.""" self.client = KafkaClient(addr) self.consumer = SimpleConsumer(self.client, group, topic, max_buffer_size=1310720000) self.temp_file_path = None self.temp_file = None self.hadoop_path = "/insight/artsy/geo" self.topic = topic self.group = group self.block_cnt = 0
def _build_topic_to_consumer_map(kafka_client, topics): """Build a mapping of topic to SimpleConsumer object for each topic. We use a single SingleConsumer per topic since SimpleConsumer allows us to seek, but only supports a single topic. """ # TODO(joshszep|DATAPIPE-2113): Update to use ConsumerGroup rather than SimpleConsumers return { topic: SimpleConsumer(client=kafka_client, group=None, topic=topic, auto_commit=False) for topic in topics }
def setup(self): self.redis_conn = redis.Redis(host=self.settings.REDIS_HOST, port=self.settings.REDIS_PORT) self.kafka_conn.ensure_topic_exists(self.settings.KAFKA_INCOMING_TOPIC) self.consumer = SimpleConsumer(self.kafka_conn, self.settings.KAFKA_GROUP, self.settings.KAFKA_INCOMING_TOPIC, auto_commit=True, iter_timeout=1.0) self.result_method = self.get_method(self.settings.SCHEMA_METHOD) self.validator = self.extend_with_default(Draft4Validator)