from utils import insert_data from pymongo import MongoClient from kafka import KafkaConsumer # mongo ===== ## Connect cliente = MongoClient('mongodb://*****:*****@localhost:27017/') ## Selecionando um Banco banco = cliente.stagioptr ## collection album = banco.feeding #kafka====== consumer = KafkaConsumer('feeding') print('Iniciando Consumer Feeding!') for message in consumer: insert_data(message, album, topic='feeding')
from bson.json_util import dumps, loads from xmlrpc.client import ServerProxy from kafka import KafkaConsumer from time import sleep sleep(20) consumer = KafkaConsumer( bootstrap_servers=['localhost:9092'], auto_offset_reset='earliest', enable_auto_commit=True, group_id='jeff-group', value_deserializer=lambda x: loads(x.decode('utf-8'))) consumer.subscribe(['rocketTopic', 'rocketSTopic']) for msg in consumer: message = msg.value topic_retrieve = msg.topic if message['action'] == "running" and topic_retrieve == "rocketTopic": print(message['rocketName'] + " FIRST STAGE || " + " at position " + message['state']) elif message['action'] == "destroy" and topic_retrieve == "rocketTopic": print(message['msg']) elif message['action'] == "running" and topic_retrieve == "rocketSTopic": print(message['rocketName'] + " SECOND STAGE || " + " at position " + message['state'])
from kafka import KafkaConsumer import json import time from elasticsearch import Elasticsearch # Wait for 15 seconds to make sure Kafka is up and running time.sleep(15) while True: consumer = KafkaConsumer('new-listings-topic', group_id='listing-indexer', bootstrap_servers=['kafka:9092']) es = Elasticsearch(['es']) for message in consumer: # take a message from the queue new_listing = json.loads((message.value).decode('utf-8')) # push to ES es.index(index='listing_index', doc_type='listing', id=json.loads(new_listing)['id'], body=new_listing) es.indices.refresh(index="listing_index")
# -*- coding:utf-8 -*- from kafka import KafkaConsumer from config.kafka_config import kafka_config from cassandra.cluster import Cluster import json kafkaConfig = kafka_config() consumer = KafkaConsumer( bootstrap_servers=kafkaConfig.bootstrap_servers, auto_offset_reset='earliest', consumer_timeout_ms=1000, value_deserializer=lambda m: json.loads(m.decode('utf-8'))) consumer.subscribe([kafkaConfig.topic]) cluster = Cluster() # Initialize Cassandra session = cluster.connect("depot_task_statistics") for msg in consumer: consumeValue = json.loads(msg.value) upd_query = "UPDATE daily_task_status_count " \ "SET task_count=task_count+1 " \ "WHERE " \ "task_date='{0}' " \ "AND depot='{1}' " \ "AND task_status='{2}'"\ .format(consumeValue['trackedAt'][0:10], consumeValue['depot'], consumeValue['task_status'])
from kafka import KafkaConsumer import json import watson import config as cfg import requests from bs4 import BeautifulSoup topic_name = 'nowPlaying' consumer = KafkaConsumer( topic_name, bootstrap_servers=cfg.es_brokers, security_protocol='SASL_SSL', sasl_mechanism='PLAIN', sasl_plain_username='******', auto_offset_reset='latest', enable_auto_commit=True, auto_commit_interval_ms=5000, fetch_max_bytes=128, max_poll_records=100, sasl_plain_password=cfg.es_apikey, value_deserializer=lambda x: json.loads(x.decode('utf-8'))) lista = [] for message in consumer: tweets = json.loads(json.dumps(message.value)) # print(json.dumps(tweets, indent=2)) for j in tweets['entities']['urls']: if "open.spotify.com/track" in j['expanded_url']: # watson.analyze(tweets['text']) page = requests.get(j['expanded_url']) soup = BeautifulSoup(page.content, 'html.parser')
from kafka import KafkaConsumer from json import loads f = open('/home/fieldemployee/Kafka_Shakespeare.txt', 'w') consumer = KafkaConsumer( 'bigdata', bootstrap_servers=['localhost:9096', 'localhost:9097', 'localhost:9098'], auto_offset_reset='earliest') for message in consumer: message = message.value f.write(message.decode("utf-8")) f.close()
# kafka parser.add_argument('topic_name') parser.add_argument('kafka_broker') # hbase parser.add_argument('data_table') parser.add_argument('hbase_host') # Parse arguments. args = parser.parse_args() topic_name = args.topic_name kafka_broker = args.kafka_broker data_table = args.data_table hbase_host = args.hbase_host # Initiate a simple kafka consumer. kafka_consumer = KafkaConsumer(topic_name, bootstrap_servers=kafka_broker) # Initiate a hbase connection. hbase_connection = happybase.Connection(hbase_host) # Create table if not exists. hbase_tables = [table.decode() for table in hbase_connection.tables()] # TODO indexing if data_table not in hbase_tables: hbase_connection.create_table(data_table, {'family': dict()}) # Setup proper shutdown hook. atexit.register(shutdown_hook, kafka_consumer, hbase_connection) # Start consuming kafka and writing to hbase. # get content and exclude headers etc.
def kafka_consumer(self, **configs): brokers = '%s:%d' % (self.server.host, self.server.port) consumer = KafkaConsumer(self.topic, bootstrap_servers=brokers, **configs) return consumer
# Need kafka-python package from time import sleep from kafka import KafkaAdminClient from kafka import KafkaConsumer from kafka.admin import NewPartitions import re bootstrap_servers = "kafka-customers-c-1-v1.corp.itcd.ru" topic_pattern = ".*CDP.Scenarios.Runtime" need_partitions = 4 sleep_after = 20 sleeping_time = 20 admin_client = KafkaAdminClient(bootstrap_servers=bootstrap_servers) consumer = KafkaConsumer(group_id='test', bootstrap_servers=bootstrap_servers) topics = consumer.topics() cdp_topics = [topic for topic in topics if re.match(topic_pattern, topic)] topics_amount = len(cdp_topics) counter = 0 for topic in cdp_topics: partitions = len(consumer.partitions_for_topic(topic)) if partitions < need_partitions: print( f'Increase number of partitions in topic {topic} to {need_partitions}' ) counter += 1 topic_partitions = {} topic_partitions[topic] = NewPartitions(total_count=need_partitions)
# Slide fetching can fail, so fetch slides until success slide_fetched = False while not slide_fetched: try: slide_fetched = fetch_slide() except Exception as e: print(f"Unexpected error when fetching a slide. {str(e)}") if __name__ == '__main__': print('Spider starting..') consumer = KafkaConsumer( TOPIC_NAME, auto_offset_reset='latest', bootstrap_servers=BOOTSTRAP_SERVERS, api_version=(0, 10), consumer_timeout_ms=1000, enable_auto_commit=True, value_deserializer=lambda x: json.loads(x.decode('utf-8'))) consumer.subscribe([TOPIC_NAME]) # Poll messages from the topic try: while True: # Response format is {TopicPartiton('topic1', 1): [msg1, msg2]} msg_pack = consumer.poll( timeout_ms=1000, # Wait for 1s when no data in buffer max_records=1) # Poll maximum 1 record at a time for tp, messages in msg_pack.items(): for message in messages:
except Exception as ex: print('Exception while parsing') print(str(ex)) finally: return json.dumps(rec) if __name__ == '__main__': print('Running Consumer..') parsed_records = [] topic_name = 'raw_recipes' parsed_topic_name = 'parsed_recipes' try: consumer = KafkaConsumer(topic_name, auto_offset_reset='earliest', bootstrap_servers=['192.168.30.94:9092'], api_version=(0, 10, 2, 0), consumer_timeout_ms=1000) except Exception as ex: print("Some thing error", ex) for msg in consumer: html = msg.value result = parse(html) parsed_records.append(result) consumer.close() sleep(5) if len(parsed_records) > 0: print('Publishing records..') producer = connect_kafka_producer() for rec in parsed_records:
import os import json import time import pymongo from kafka import KafkaConsumer from pymongo import MongoClient kafka_topic = "topic1" if __name__ == '__main__': client = MongoClient("mongodb://192.168.99.100:27017") db = client.dbtwitter time.sleep(10) # wait until Kafka is running kafka_service = os.environ['KAFKA_SERVICE'] print("Consumer is using kafka service {0}".format(kafka_service)) consumer = KafkaConsumer(bootstrap_servers=[kafka_service], api_version=(0, 10)) consumer.subscribe(kafka_topic) for msg in consumer: result = db.tbcanada.insert_one(json.loads(msg.value.decode("utf-8"))) #print (msg.value.decode("utf-8"))
from kafka import KafkaProducer, KafkaConsumer, TopicPartition import argparse import json import os import io print "Beginning reading mock_twitter_stream messages into spark_input" kafka_topic = 'mock_twitter_stream' number = 9974 consumer = KafkaConsumer(bootstrap_servers='localhost:9092') producer = KafkaProducer(bootstrap_servers='localhost:9092', value_serializer=lambda v: json.dumps(v).encode('utf-8')) partitions = consumer.partitions_for_topic(kafka_topic) topic_partitions = list() for partition in partitions: topic_partitions.append(TopicPartition(kafka_topic, partition)) consumer.assign(topic_partitions) consumer.seek_to_end() topic_partition_to_offset = dict() for topic_partition in topic_partitions: next_offset = consumer.position(topic_partition) reduced_offset = max(next_offset - number, 0) topic_partition_to_offset[topic_partition] = reduced_offset for topic_partition, offset in topic_partition_to_offset.items(): consumer.seek(topic_partition, offset)
from controller import commLogController from pytz import timezone from datetime import datetime from kafka import KafkaConsumer from json import loads broker_address = "103.56.148.215" #"161.117.58.227" port = "9092" group_id = 'SEMAR-IoT-Platform' topic_list = {} main_folder = 'data' topic_active = ['kafka-service-subscribe', 'kafka-service-unsubscribe'] consumer = KafkaConsumer(bootstrap_servers=[broker_address + ':' + port], auto_offset_reset='earliest', enable_auto_commit=True, group_id=group_id, value_deserializer=lambda x: loads(x.decode('utf-8'))) consumer.subscribe(topic_active) print(topic_active) sys.stdout.flush() def subscribe_list(): query = {"active": True, "channel_type": "kafka"} result = comChannelController.find(query) for val in result['data']: topic_list[val['topic']] = val['channel_code'] consumer.subscribe([val['topic']]) print("Subscribe Topic: " + val['topic']) sys.stdout.flush()
import json from kafka import KafkaConsumer from code.servers import bootstrap_servers from code.topics import TOPIC_1 consumer = KafkaConsumer( value_deserializer=lambda m: json.loads(m), bootstrap_servers=bootstrap_servers, group_id='my-group', ) consumer.subscribe([TOPIC_1]) for message in consumer: print( f'{message.topic}:{message.partition}:{message.offset} key={message.key} value={message.value}' )
} timestamp = datetime.utcfromtimestamp( msg_dict['Timestamp']) # assume timestamp is UTC msg_dict['MessageDate'] = timestamp.isoformat() if msg_dict['Position.satellites'] < 3: msg_dict["Position.lon"] = -1 msg_dict["Position.lat"] = -1 if msg_dict['Position.altitude'] < 0: msg_dict['Position.altitude'] = 0 return msg_dict if __name__ == '__main__': topic = 'VinVehicle' consumer = KafkaConsumer(topic, bootstrap_servers='localhost:9092', value_deserializer=lambda x: x.decode('utf-8'), auto_offset_reset='earliest') prod = KafkaProducer( bootstrap_servers='localhost:9092', value_serializer=lambda x: json.dumps(x).encode('utf-8')) count = 1 for msg in consumer: print(f"retrieved message {count} from topic {topic}") dict_msg = process_msg(msg.value) realtime_msg = { "VinVehicle": dict_msg["VinVehicle"], "Position.lon": dict_msg["Position.lon"], "Position.lat": dict_msg["Position.lat"], "Position.altitude": dict_msg["Position.altitude"], "Position.heading": dict_msg["Position.heading"], "Position.speed": dict_msg["Position.speed"],
def startApplication(): logger.info("starting isbn-lookup-service consumer") consumer = KafkaConsumer(KAFKA_TOPIC, group_id=KAFKA_GROUP_ID, max_poll_records=KAFKA_MAX_POLL_RECORDS, bootstrap_servers=[KAFKA_BROKERS],api_version=(1, 1, 0))
It waits for new purchases. It reviews them and publishes the review. """ import random import time from kafka import KafkaConsumer, KafkaProducer from kafka.errors import KafkaError BOTTLE_PURCHASE_TOPIC = "bottle-purchase" BOTTLE_REVIEW_TOPIC = "bottle-review" consumer = KafkaConsumer( BOTTLE_PURCHASE_TOPIC, bootstrap_servers=["localhost:9092"], # auto_offset_reset="earliest", # api_version=(0, 10), # consumer_timeout_ms=1000, ) producer = KafkaProducer(bootstrap_servers=["localhost:9092"]) def main(): # Consuming bottle-purchase. for message in consumer: key = message.key if key: key = key.decode("utf-8") value = message.value if value: value = value.decode("utf-8")
from kafka import KafkaConsumer from pymongo import MongoClient from json import loads KAFKA_VERSION = (0, 10) consumer = KafkaConsumer( 'numtest', bootstrap_servers=['localhost:9092'], auto_offset_reset= 'earliest', # It handles where the consumer restarts reading after breaking down or being turned off and can be set either to *earliest* or *latest*. When # set to *latest*, the consumer starts reading at the end of the log. When set to *earliest*, the consumer starts reading at the latest committed offset. enable_auto_commit= True, # makes sure the consumer commits its read offset every interval. #auto_commit_interval_ms="1000ms", #sets the interval between two commits. Since messages are coming in every five second, committing every second seems fair. group_id='my-group', api_version=KAFKA_VERSION, value_deserializer=lambda x: loads(x.decode('utf-8'))) client = MongoClient('localhost:27017') collection = client.numtest.numtest for message in consumer: message = message.value collection.insert_one(message) print('{} added to {}'.format(message, collection))
from kafka import KafkaConsumer import json import pymongo from json import loads, dumps consumer = KafkaConsumer('pokemon', bootstrap_servers=['localhost:29092'], enable_auto_commit=True, auto_commit_interval_ms=5000, value_deserializer=lambda x: loads(x.decode('utf-8')), auto_offset_reset='earliest') myclient = pymongo.MongoClient("mongodb://localhost:27017/") mydb = myclient["pokemons"] mycollect = mydb["pokemon"] for msg in consumer: pokemon_name = msg.value['forms'][0]['name'] print(pokemon_name) mycollect.insert_one(msg.value) print('Inserted into MongoDB!')
region = "wgie" else: region = mirrors[-1].split("-")[0] topic ='wgdp-kafka-production-'+region try: producer = KafkaProducer(bootstrap_servers=mirrors) message = bytes(str(int(time.time())).encode("utf-8")) producer.send(topic, message) except: current_time = datetime.datetime.now().isoformat() hostname = os.uname()[1].split(".")[0] logging.critical( "{} {} No available brokers to check".format(current_time, hostname) ) consumer = KafkaConsumer(topic, group_id='mmchecker', bootstrap_servers=centers, consumer_timeout_ms=15000, auto_commit_interval_ms=4000) if not os.path.exists(lagfile): f = open(lagfile, "w") f.write("0") f = open(lagfile, "r+") prelag = int(f.read()) messages = [] for msg in consumer: messages.append(msg.value) messages.sort() if len(messages) > 0: c_time = int(time.time()) delay = int((c_time - int(messages[-1])) / 60 / 5) else: delay = prelag + 1 f.seek(0)
from kafka import KafkaConsumer from setup import topicName from elasticsearch import Elasticsearch from secrets import main_host, username, password, host import json # To consume latest messages and auto-commit offsets consumer = KafkaConsumer(topicName, group_id='kafka_elastic_search_v1', bootstrap_servers=['localhost:9092'], auto_offset_reset='earliest') if consumer.bootstrap_connected() == False: raise Exception('You arent connected to the kafka server') def getIDfromJSON(message): message = message.decode() message = json.loads(message) _id = int(message['id_str']) return _id es = Elasticsearch(hosts=[host]) # i = 100 for message in consumer: _id = getIDfromJSON(message.value) es.create(index="twitter", id=_id, body=message.value, doc_type="tweets") print(f'ID {_id} has been sent to elasticsearch') #
from flask import Flask from redis import Redis, RedisError from kafka import KafkaConsumer import os import socket KAFKA_HOSTS = ['localhost:9092'] KAFKA_VERSION = (0, 10) # To consume latest messages and auto-commit offsets consumer = KafkaConsumer('fooddelivery', group_id='', bootstrap_servers=KAFKA_HOSTS) # bootstrap_servers=KAFKA_HOSTS, api_version=KAFKA_VERSION) for message in consumer: print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
os.getenv('ES_HOST'), os.getenv('ES_PORT'), use_ssl=os.getenv('ES_USE_SSL', False), verify_certs=os.getenv('ES_VERIFY_CERTS', False), http_auth=(os.getenv('ES_USER'), os.getenv('ES_PASSWORD')) if os.getenv('ES_USER') else None, ca_certs=os.getenv('ES_CA_CERTS', None)) geo_point_mapping = es.define_geo_point_mapping() es.create_index(ELASTICSEARCH_INDEX, geo_point_mapping) kafka_consumer = KafkaConsumer( KAFKA_TOPIC, bootstrap_servers=[ "{}:{}".format(os.getenv('KAFKA_HOST'), os.getenv('KAFKA_PORT')) ], # auto_offset_reset='earliest', security_protocol=os.getenv('KAFKA_SECURITY_PROTOCOL', 'PLAINTEXT'), ssl_cafile=os.getenv('KAFKA_CA_FILE', None), ssl_certfile=os.getenv('KAFKA_CERT_FILE', None), ssl_keyfile=os.getenv('KAFKA_KEY_FILE', None), group_id='group_' + KAFKA_TOPIC, value_deserializer=lambda m: json.loads(m.decode('utf8'))) c = 0 for msg in kafka_consumer: c += 1 print("Consumed: {} messages".format(c)) # data are already processed in the appropriate way from producer's DataFrame, so just insert them to DB print(es.insert_doc(msg.value))
from kafka import KafkaConsumer from kafka import TopicPartition import json if __name__ == '__main__': consumer = KafkaConsumer('registered_user', bootstrap_servers=['192.168.1.2:9092'], auto_offset_reset='earliest', enable_auto_commit=True, group_id='consumer-group-a') print("Connected:", consumer.bootstrap_connected()) print("Subscription:", consumer.subscription()) print("starting the consumer...") for msg in consumer: print("Registered User: {}".format(json.loads(msg.value)))
import avro.schema import avro.io from struct import * from kafka import KafkaConsumer topic = 'debug' if __name__ == "__main__": # avro schema path schema_path = '../schema/d_hb.avsc' # load avro schema schema = avro.schema.parse(open(schema_path).read()) consumer = KafkaConsumer(topic, group_id='debugtest') for message in consumer: # disregard any message that does not have heartbeat key key_splited = message.key.split(':') if key_splited[0] != 'hb': continue isoblue_id = key_splited[1] # setup avro decoder bytes_reader = io.BytesIO(message.value) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(schema) hb_datum = reader.read(decoder)
#!/usr/bin/python # -*- coding: utf-8 -*- # python version 2.7.6 #消费者必须先启动 from kafka import KafkaConsumer # To consume messages consumer = KafkaConsumer('my-topic', group_id='my_group', bootstrap_servers=['localhost:9092']) for message in consumer: # message value is raw byte string -- decode if necessary! # e.g., for unicode: `message.value.decode('utf-8')` print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2018/6/4 下午6:14 # @Author : xiaowei # @Site : # @File : test.py # @Software: PyCharm from kafka import KafkaConsumer from kafka.structs import TopicPartition consumer = KafkaConsumer('index-vehicle', bootstrap_servers=['192.168.6.27:9092']) print(consumer.partitions_for_topic('index-vehicle')) #print(consumer.beginning_offsets(consumer.assignment())) print(consumer.topics())
from kafka import KafkaConsumer import logging import json logging.basicConfig(level=logging.INFO) # properties for consumer bootstrap_server: str = 'localhost:9092' topic_read: str = 'third_topic' # create consumer consumer = KafkaConsumer(bootstrap_servers=bootstrap_server, group_id='my-group', auto_offset_reset='earliest') # read data from topic consumer.subscribe(topic_read) try: for message in consumer: logging.info(message.value) except KeyboardInterrupt: ''' it is to avoid annoying keybordInterrupt log in terminal ''' logging.error('Consumer closed')
print('... slept') except Exception as e: print(e) KAFKA_BROKERS = 'wielder-kafka.kafka.svc.cluster.local:9092' KAFKA_TOPIC = 'demo' GROUP_ID = 'pep2' print( f'KAFKA_BROKERS: {KAFKA_BROKERS}\n Topic {KAFKA_TOPIC}\n group id: {GROUP_ID}' ) consumer = KafkaConsumer(KAFKA_TOPIC, bootstrap_servers=KAFKA_BROKERS, group_id=GROUP_ID, enable_auto_commit=False, max_poll_records=1) print(f'bootstrap_servers: {KAFKA_BROKERS} subscribing to {KAFKA_TOPIC}') consumer.subscribe([KAFKA_TOPIC]) for message in consumer: print(f"message is of type: {type(message)}") print(message) # do_something_time_consuming() _cmd = f"perl ./pep.pl" cmd_for_return_code(_cmd, msg=message.value) meta = consumer.partitions_for_topic(message.topic)