Esempio n. 1
0
from utils import insert_data
from pymongo import MongoClient
from kafka import KafkaConsumer

# mongo =====
## Connect
cliente = MongoClient('mongodb://*****:*****@localhost:27017/')

## Selecionando um Banco
banco = cliente.stagioptr

## collection
album = banco.feeding

#kafka======
consumer = KafkaConsumer('feeding')

print('Iniciando Consumer Feeding!')

for message in consumer:
    insert_data(message, album, topic='feeding')
from bson.json_util import dumps, loads
from xmlrpc.client import ServerProxy
from kafka import KafkaConsumer
from time import sleep

sleep(20)

consumer = KafkaConsumer(
    bootstrap_servers=['localhost:9092'],
    auto_offset_reset='earliest',
    enable_auto_commit=True,
    group_id='jeff-group',
    value_deserializer=lambda x: loads(x.decode('utf-8')))

consumer.subscribe(['rocketTopic', 'rocketSTopic'])

for msg in consumer:
    message = msg.value
    topic_retrieve = msg.topic

    if message['action'] == "running" and topic_retrieve == "rocketTopic":
        print(message['rocketName'] + " FIRST STAGE || " + " at position " + message['state'])
    elif message['action'] == "destroy" and topic_retrieve == "rocketTopic":
        print(message['msg'])
    elif message['action'] == "running" and topic_retrieve == "rocketSTopic":
        print(message['rocketName'] + " SECOND STAGE || " + " at position " + message['state'])
Esempio n. 3
0
from kafka import KafkaConsumer
import json
import time
from elasticsearch import Elasticsearch

# Wait for 15 seconds to make sure Kafka is up and running
time.sleep(15)
while True:
    consumer = KafkaConsumer('new-listings-topic',
                             group_id='listing-indexer',
                             bootstrap_servers=['kafka:9092'])
    es = Elasticsearch(['es'])
    for message in consumer:
        # take a message from the queue
        new_listing = json.loads((message.value).decode('utf-8'))
        # push to ES
        es.index(index='listing_index',
                 doc_type='listing',
                 id=json.loads(new_listing)['id'],
                 body=new_listing)
        es.indices.refresh(index="listing_index")
Esempio n. 4
0
# -*- coding:utf-8 -*-

from kafka import KafkaConsumer
from config.kafka_config import kafka_config

from cassandra.cluster import Cluster
import json

kafkaConfig = kafka_config()

consumer = KafkaConsumer(
    bootstrap_servers=kafkaConfig.bootstrap_servers,
    auto_offset_reset='earliest',
    consumer_timeout_ms=1000,
    value_deserializer=lambda m: json.loads(m.decode('utf-8')))

consumer.subscribe([kafkaConfig.topic])

cluster = Cluster()  # Initialize Cassandra
session = cluster.connect("depot_task_statistics")

for msg in consumer:
    consumeValue = json.loads(msg.value)

    upd_query = "UPDATE daily_task_status_count " \
                "SET task_count=task_count+1 " \
                "WHERE " \
                "task_date='{0}' " \
                "AND depot='{1}' " \
                "AND task_status='{2}'"\
        .format(consumeValue['trackedAt'][0:10], consumeValue['depot'], consumeValue['task_status'])
Esempio n. 5
0
from kafka import KafkaConsumer
import json
import watson
import config as cfg
import requests
from bs4 import BeautifulSoup
topic_name = 'nowPlaying'

consumer = KafkaConsumer(
    topic_name,
    bootstrap_servers=cfg.es_brokers,
    security_protocol='SASL_SSL',
    sasl_mechanism='PLAIN',
    sasl_plain_username='******',
    auto_offset_reset='latest',
    enable_auto_commit=True,
    auto_commit_interval_ms=5000,
    fetch_max_bytes=128,
    max_poll_records=100,
    sasl_plain_password=cfg.es_apikey,
    value_deserializer=lambda x: json.loads(x.decode('utf-8')))
lista = []
for message in consumer:
    tweets = json.loads(json.dumps(message.value))
    # print(json.dumps(tweets, indent=2))

    for j in tweets['entities']['urls']:
        if "open.spotify.com/track" in j['expanded_url']:
            # watson.analyze(tweets['text'])
            page = requests.get(j['expanded_url'])
            soup = BeautifulSoup(page.content, 'html.parser')
Esempio n. 6
0
from kafka import KafkaConsumer
from json import loads

f = open('/home/fieldemployee/Kafka_Shakespeare.txt', 'w')
consumer = KafkaConsumer(
    'bigdata',
    bootstrap_servers=['localhost:9096', 'localhost:9097', 'localhost:9098'],
    auto_offset_reset='earliest')

for message in consumer:
    message = message.value
    f.write(message.decode("utf-8"))

f.close()
    # kafka
    parser.add_argument('topic_name')
    parser.add_argument('kafka_broker')
    # hbase
    parser.add_argument('data_table')
    parser.add_argument('hbase_host')

    # Parse arguments.
    args = parser.parse_args()
    topic_name = args.topic_name
    kafka_broker = args.kafka_broker
    data_table = args.data_table
    hbase_host = args.hbase_host

    # Initiate a simple kafka consumer.
    kafka_consumer = KafkaConsumer(topic_name, bootstrap_servers=kafka_broker)

    # Initiate a hbase connection.
    hbase_connection = happybase.Connection(hbase_host)

    # Create table if not exists.
    hbase_tables = [table.decode() for table in hbase_connection.tables()]
    # TODO indexing
    if data_table not in hbase_tables:
        hbase_connection.create_table(data_table, {'family': dict()})

    # Setup proper shutdown hook.
    atexit.register(shutdown_hook, kafka_consumer, hbase_connection)

    # Start consuming kafka and writing to hbase.
    # get content and exclude headers etc.
Esempio n. 8
0
 def kafka_consumer(self, **configs):
     brokers = '%s:%d' % (self.server.host, self.server.port)
     consumer = KafkaConsumer(self.topic,
                              bootstrap_servers=brokers,
                              **configs)
     return consumer
Esempio n. 9
0
# Need kafka-python package
from time import sleep
from kafka import KafkaAdminClient
from kafka import KafkaConsumer
from kafka.admin import NewPartitions
import re

bootstrap_servers = "kafka-customers-c-1-v1.corp.itcd.ru"
topic_pattern = ".*CDP.Scenarios.Runtime"
need_partitions = 4
sleep_after = 20
sleeping_time = 20

admin_client = KafkaAdminClient(bootstrap_servers=bootstrap_servers)

consumer = KafkaConsumer(group_id='test', bootstrap_servers=bootstrap_servers)

topics = consumer.topics()
cdp_topics = [topic for topic in topics if re.match(topic_pattern, topic)]
topics_amount = len(cdp_topics)
counter = 0

for topic in cdp_topics:
    partitions = len(consumer.partitions_for_topic(topic))
    if partitions < need_partitions:
        print(
            f'Increase number of partitions in topic {topic} to {need_partitions}'
        )
        counter += 1
        topic_partitions = {}
        topic_partitions[topic] = NewPartitions(total_count=need_partitions)
Esempio n. 10
0
        # Slide fetching can fail, so fetch slides until success
        slide_fetched = False
        while not slide_fetched:
            try:
                slide_fetched = fetch_slide()
            except Exception as e:
                print(f"Unexpected error when fetching a slide. {str(e)}")


if __name__ == '__main__':
    print('Spider starting..')

    consumer = KafkaConsumer(
        TOPIC_NAME,
        auto_offset_reset='latest',
        bootstrap_servers=BOOTSTRAP_SERVERS,
        api_version=(0, 10),
        consumer_timeout_ms=1000,
        enable_auto_commit=True,
        value_deserializer=lambda x: json.loads(x.decode('utf-8')))

    consumer.subscribe([TOPIC_NAME])  # Poll messages from the topic

    try:
        while True:
            # Response format is {TopicPartiton('topic1', 1): [msg1, msg2]}
            msg_pack = consumer.poll(
                timeout_ms=1000,  # Wait for 1s when no data in buffer
                max_records=1)  # Poll maximum 1 record at a time

            for tp, messages in msg_pack.items():
                for message in messages:
Esempio n. 11
0
    except Exception as ex:
        print('Exception while parsing')
        print(str(ex))
    finally:
        return json.dumps(rec)


if __name__ == '__main__':
    print('Running Consumer..')
    parsed_records = []
    topic_name = 'raw_recipes'
    parsed_topic_name = 'parsed_recipes'
    try:
        consumer = KafkaConsumer(topic_name,
                                 auto_offset_reset='earliest',
                                 bootstrap_servers=['192.168.30.94:9092'],
                                 api_version=(0, 10, 2, 0),
                                 consumer_timeout_ms=1000)
    except Exception as ex:
        print("Some thing error", ex)
    for msg in consumer:
        html = msg.value
        result = parse(html)
        parsed_records.append(result)
    consumer.close()
    sleep(5)

    if len(parsed_records) > 0:
        print('Publishing records..')
        producer = connect_kafka_producer()
        for rec in parsed_records:
Esempio n. 12
0
import os
import json
import time
import pymongo
from kafka import KafkaConsumer
from pymongo import MongoClient

kafka_topic = "topic1"

if __name__ == '__main__':

    client = MongoClient("mongodb://192.168.99.100:27017")
    db = client.dbtwitter

    time.sleep(10)  # wait until Kafka is running
    kafka_service = os.environ['KAFKA_SERVICE']
    print("Consumer is using kafka service {0}".format(kafka_service))

    consumer = KafkaConsumer(bootstrap_servers=[kafka_service],
                             api_version=(0, 10))
    consumer.subscribe(kafka_topic)
    for msg in consumer:
        result = db.tbcanada.insert_one(json.loads(msg.value.decode("utf-8")))
        #print (msg.value.decode("utf-8"))
Esempio n. 13
0
from kafka import KafkaProducer, KafkaConsumer, TopicPartition
import argparse
import json
import os
import io

print "Beginning reading mock_twitter_stream messages into spark_input"

kafka_topic = 'mock_twitter_stream'
number = 9974

consumer = KafkaConsumer(bootstrap_servers='localhost:9092')
producer = KafkaProducer(bootstrap_servers='localhost:9092', value_serializer=lambda v: json.dumps(v).encode('utf-8'))

partitions = consumer.partitions_for_topic(kafka_topic)
topic_partitions = list()
for partition in partitions:
    topic_partitions.append(TopicPartition(kafka_topic, partition))

consumer.assign(topic_partitions)
consumer.seek_to_end()

topic_partition_to_offset = dict()
for topic_partition in topic_partitions:
    next_offset = consumer.position(topic_partition)
    reduced_offset = max(next_offset - number, 0)
    topic_partition_to_offset[topic_partition] = reduced_offset

for topic_partition, offset in topic_partition_to_offset.items():
    consumer.seek(topic_partition, offset)
Esempio n. 14
0
from controller import commLogController
from pytz import timezone
from datetime import datetime
from kafka import KafkaConsumer
from json import loads

broker_address = "103.56.148.215"  #"161.117.58.227"
port = "9092"
group_id = 'SEMAR-IoT-Platform'
topic_list = {}
main_folder = 'data'
topic_active = ['kafka-service-subscribe', 'kafka-service-unsubscribe']

consumer = KafkaConsumer(bootstrap_servers=[broker_address + ':' + port],
                         auto_offset_reset='earliest',
                         enable_auto_commit=True,
                         group_id=group_id,
                         value_deserializer=lambda x: loads(x.decode('utf-8')))
consumer.subscribe(topic_active)
print(topic_active)
sys.stdout.flush()


def subscribe_list():
    query = {"active": True, "channel_type": "kafka"}
    result = comChannelController.find(query)
    for val in result['data']:
        topic_list[val['topic']] = val['channel_code']
        consumer.subscribe([val['topic']])
        print("Subscribe Topic: " + val['topic'])
        sys.stdout.flush()
Esempio n. 15
0
import json

from kafka import KafkaConsumer

from code.servers import bootstrap_servers
from code.topics import TOPIC_1

consumer = KafkaConsumer(
    value_deserializer=lambda m: json.loads(m),
    bootstrap_servers=bootstrap_servers,
    group_id='my-group',
)
consumer.subscribe([TOPIC_1])

for message in consumer:
    print(
        f'{message.topic}:{message.partition}:{message.offset} key={message.key} value={message.value}'
    )
Esempio n. 16
0
    }
    timestamp = datetime.utcfromtimestamp(
        msg_dict['Timestamp'])  # assume timestamp is UTC
    msg_dict['MessageDate'] = timestamp.isoformat()
    if msg_dict['Position.satellites'] < 3:
        msg_dict["Position.lon"] = -1
        msg_dict["Position.lat"] = -1
    if msg_dict['Position.altitude'] < 0:
        msg_dict['Position.altitude'] = 0
    return msg_dict


if __name__ == '__main__':
    topic = 'VinVehicle'
    consumer = KafkaConsumer(topic,
                             bootstrap_servers='localhost:9092',
                             value_deserializer=lambda x: x.decode('utf-8'),
                             auto_offset_reset='earliest')
    prod = KafkaProducer(
        bootstrap_servers='localhost:9092',
        value_serializer=lambda x: json.dumps(x).encode('utf-8'))
    count = 1
    for msg in consumer:
        print(f"retrieved message {count} from topic {topic}")
        dict_msg = process_msg(msg.value)
        realtime_msg = {
            "VinVehicle": dict_msg["VinVehicle"],
            "Position.lon": dict_msg["Position.lon"],
            "Position.lat": dict_msg["Position.lat"],
            "Position.altitude": dict_msg["Position.altitude"],
            "Position.heading": dict_msg["Position.heading"],
            "Position.speed": dict_msg["Position.speed"],
def startApplication():
    logger.info("starting isbn-lookup-service consumer")
    consumer = KafkaConsumer(KAFKA_TOPIC, group_id=KAFKA_GROUP_ID, max_poll_records=KAFKA_MAX_POLL_RECORDS, bootstrap_servers=[KAFKA_BROKERS],api_version=(1, 1, 0))
Esempio n. 18
0
It waits for new purchases.
It reviews them and publishes the review.
"""
import random
import time
from kafka import KafkaConsumer, KafkaProducer
from kafka.errors import KafkaError

BOTTLE_PURCHASE_TOPIC = "bottle-purchase"
BOTTLE_REVIEW_TOPIC = "bottle-review"

consumer = KafkaConsumer(
    BOTTLE_PURCHASE_TOPIC,
    bootstrap_servers=["localhost:9092"],
    # auto_offset_reset="earliest",
    # api_version=(0, 10),
    # consumer_timeout_ms=1000,
)
producer = KafkaProducer(bootstrap_servers=["localhost:9092"])


def main():
    # Consuming bottle-purchase.
    for message in consumer:
        key = message.key
        if key:
            key = key.decode("utf-8")
        value = message.value
        if value:
            value = value.decode("utf-8")
Esempio n. 19
0
from kafka import KafkaConsumer
from pymongo import MongoClient
from json import loads

KAFKA_VERSION = (0, 10)

consumer = KafkaConsumer(
    'numtest',
    bootstrap_servers=['localhost:9092'],
    auto_offset_reset=
    'earliest',  # It handles where the consumer restarts reading after breaking down or being turned off and can be set either to *earliest* or *latest*. When
    # set to *latest*, the consumer starts reading at the end of the log. When set to *earliest*, the consumer starts reading at the latest committed offset.
    enable_auto_commit=
    True,  # makes sure the consumer commits its read offset every interval.

    #auto_commit_interval_ms="1000ms",  #sets the interval between two commits. Since messages are coming in every five second, committing every second seems fair.
    group_id='my-group',
    api_version=KAFKA_VERSION,
    value_deserializer=lambda x: loads(x.decode('utf-8')))

client = MongoClient('localhost:27017')
collection = client.numtest.numtest

for message in consumer:
    message = message.value
    collection.insert_one(message)
    print('{} added to {}'.format(message, collection))
from kafka import KafkaConsumer
import json
import pymongo
from json import loads, dumps

consumer = KafkaConsumer('pokemon',
                         bootstrap_servers=['localhost:29092'],
                         enable_auto_commit=True,
                         auto_commit_interval_ms=5000,
                         value_deserializer=lambda x: loads(x.decode('utf-8')),
                         auto_offset_reset='earliest')

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["pokemons"]
mycollect = mydb["pokemon"]

for msg in consumer:
    pokemon_name = msg.value['forms'][0]['name']
    print(pokemon_name)
    mycollect.insert_one(msg.value)
    print('Inserted into MongoDB!')
Esempio n. 21
0
    region = "wgie"
else:
    region = mirrors[-1].split("-")[0]
topic ='wgdp-kafka-production-'+region
try:
    producer = KafkaProducer(bootstrap_servers=mirrors)
    message = bytes(str(int(time.time())).encode("utf-8"))
    producer.send(topic, message)
except:
    current_time = datetime.datetime.now().isoformat()
    hostname = os.uname()[1].split(".")[0]
    logging.critical(
        "{} {} No available brokers to check".format(current_time, hostname)
    )

consumer = KafkaConsumer(topic, group_id='mmchecker', bootstrap_servers=centers, consumer_timeout_ms=15000, auto_commit_interval_ms=4000)
if not os.path.exists(lagfile):
    f = open(lagfile, "w")
    f.write("0")
f = open(lagfile, "r+")
prelag = int(f.read())
messages = []
for msg in consumer:
    messages.append(msg.value)
messages.sort()
if len(messages) > 0:
    c_time = int(time.time())
    delay = int((c_time - int(messages[-1])) / 60 / 5)
else:
    delay = prelag + 1
f.seek(0)
from kafka import KafkaConsumer
from setup import topicName
from elasticsearch import Elasticsearch
from secrets import main_host, username, password, host
import json

# To consume latest messages and auto-commit offsets
consumer = KafkaConsumer(topicName,
                         group_id='kafka_elastic_search_v1',
                         bootstrap_servers=['localhost:9092'],
                         auto_offset_reset='earliest')

if consumer.bootstrap_connected() == False:
    raise Exception('You arent connected to the kafka server')


def getIDfromJSON(message):
    message = message.decode()
    message = json.loads(message)
    _id = int(message['id_str'])
    return _id


es = Elasticsearch(hosts=[host])

# i = 100
for message in consumer:
    _id = getIDfromJSON(message.value)
    es.create(index="twitter", id=_id, body=message.value, doc_type="tweets")
    print(f'ID {_id} has been sent to elasticsearch')
    #
Esempio n. 23
0
from flask import Flask
from redis import Redis, RedisError
from kafka import KafkaConsumer
import os
import socket

KAFKA_HOSTS = ['localhost:9092']
KAFKA_VERSION = (0, 10)

# To consume latest messages and auto-commit offsets
consumer = KafkaConsumer('fooddelivery',
                         group_id='',
                         bootstrap_servers=KAFKA_HOSTS)
#                         bootstrap_servers=KAFKA_HOSTS, api_version=KAFKA_VERSION)
for message in consumer:
    print("%s:%d:%d: key=%s value=%s" %
          (message.topic, message.partition, message.offset, message.key,
           message.value))
Esempio n. 24
0
    os.getenv('ES_HOST'),
    os.getenv('ES_PORT'),
    use_ssl=os.getenv('ES_USE_SSL', False),
    verify_certs=os.getenv('ES_VERIFY_CERTS', False),
    http_auth=(os.getenv('ES_USER'),
               os.getenv('ES_PASSWORD')) if os.getenv('ES_USER') else None,
    ca_certs=os.getenv('ES_CA_CERTS', None))

geo_point_mapping = es.define_geo_point_mapping()

es.create_index(ELASTICSEARCH_INDEX, geo_point_mapping)

kafka_consumer = KafkaConsumer(
    KAFKA_TOPIC,
    bootstrap_servers=[
        "{}:{}".format(os.getenv('KAFKA_HOST'), os.getenv('KAFKA_PORT'))
    ],
    # auto_offset_reset='earliest',
    security_protocol=os.getenv('KAFKA_SECURITY_PROTOCOL', 'PLAINTEXT'),
    ssl_cafile=os.getenv('KAFKA_CA_FILE', None),
    ssl_certfile=os.getenv('KAFKA_CERT_FILE', None),
    ssl_keyfile=os.getenv('KAFKA_KEY_FILE', None),
    group_id='group_' + KAFKA_TOPIC,
    value_deserializer=lambda m: json.loads(m.decode('utf8')))
c = 0
for msg in kafka_consumer:
    c += 1
    print("Consumed: {} messages".format(c))
    # data are already processed in the appropriate way from producer's DataFrame, so just insert them to DB
    print(es.insert_doc(msg.value))
Esempio n. 25
0
from kafka import KafkaConsumer
from kafka import TopicPartition
import json

if __name__ == '__main__':
    consumer = KafkaConsumer('registered_user',
                             bootstrap_servers=['192.168.1.2:9092'],
                             auto_offset_reset='earliest',
                             enable_auto_commit=True,
                             group_id='consumer-group-a')

    print("Connected:", consumer.bootstrap_connected())
    print("Subscription:", consumer.subscription())
    print("starting the consumer...")

    for msg in consumer:
        print("Registered User: {}".format(json.loads(msg.value)))
Esempio n. 26
0
import avro.schema
import avro.io

from struct import *
from kafka import KafkaConsumer

topic = 'debug'

if __name__ == "__main__":
    # avro schema path
    schema_path = '../schema/d_hb.avsc'

    # load avro schema
    schema = avro.schema.parse(open(schema_path).read())

    consumer = KafkaConsumer(topic, group_id='debugtest')

    for message in consumer:
        # disregard any message that does not have heartbeat key
        key_splited = message.key.split(':')
        if key_splited[0] != 'hb':
            continue

        isoblue_id = key_splited[1]

        # setup avro decoder
        bytes_reader = io.BytesIO(message.value)
        decoder = avro.io.BinaryDecoder(bytes_reader)
        reader = avro.io.DatumReader(schema)
        hb_datum = reader.read(decoder)
Esempio n. 27
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
# python version 2.7.6

#消费者必须先启动

from kafka import KafkaConsumer

# To consume messages
consumer = KafkaConsumer('my-topic',
                         group_id='my_group',
                         bootstrap_servers=['localhost:9092'])
for message in consumer:
    # message value is raw byte string -- decode if necessary!
    # e.g., for unicode: `message.value.decode('utf-8')`
    print("%s:%d:%d: key=%s value=%s" %
          (message.topic, message.partition, message.offset, message.key,
           message.value))
Esempio n. 28
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2018/6/4 下午6:14
# @Author : xiaowei
# @Site :
# @File : test.py
# @Software: PyCharm
from kafka import KafkaConsumer
from kafka.structs import TopicPartition

consumer = KafkaConsumer('index-vehicle',
                         bootstrap_servers=['192.168.6.27:9092'])

print(consumer.partitions_for_topic('index-vehicle'))
#print(consumer.beginning_offsets(consumer.assignment()))
print(consumer.topics())
Esempio n. 29
0
from kafka import KafkaConsumer
import logging
import json

logging.basicConfig(level=logging.INFO)

# properties for consumer
bootstrap_server: str = 'localhost:9092'
topic_read: str = 'third_topic'

# create consumer

consumer = KafkaConsumer(bootstrap_servers=bootstrap_server,
                         group_id='my-group',
                         auto_offset_reset='earliest')

# read data from topic

consumer.subscribe(topic_read)

try:
    for message in consumer:
        logging.info(message.value)
except KeyboardInterrupt:
    '''
        it is to avoid annoying keybordInterrupt log in terminal
    '''
    logging.error('Consumer closed')
Esempio n. 30
0
        print('... slept')
    except Exception as e:
        print(e)


KAFKA_BROKERS = 'wielder-kafka.kafka.svc.cluster.local:9092'
KAFKA_TOPIC = 'demo'
GROUP_ID = 'pep2'

print(
    f'KAFKA_BROKERS: {KAFKA_BROKERS}\n Topic {KAFKA_TOPIC}\n group id: {GROUP_ID}'
)

consumer = KafkaConsumer(KAFKA_TOPIC,
                         bootstrap_servers=KAFKA_BROKERS,
                         group_id=GROUP_ID,
                         enable_auto_commit=False,
                         max_poll_records=1)

print(f'bootstrap_servers: {KAFKA_BROKERS} subscribing to {KAFKA_TOPIC}')
consumer.subscribe([KAFKA_TOPIC])

for message in consumer:
    print(f"message is of type: {type(message)}")
    print(message)

    # do_something_time_consuming()
    _cmd = f"perl ./pep.pl"
    cmd_for_return_code(_cmd, msg=message.value)

    meta = consumer.partitions_for_topic(message.topic)