def invoke_kafka_consumer(self, p_str_broker_host, p_is_sasl):
        if p_is_sasl:
            # consumer = Consumer({
            #     'bootstrap.servers': config.BOOTSTRAP_SERVERS,
            #     'group.id': config.CONSUMER_GROUP,
            #     'enable.auto.commit': False,
            # })

            return KafkaConsumer(bootstrap_servers=p_str_broker_host,
                                 security_protocol='SASL_PLAINTEXT',
                                 sasl_mechanism='PLAIN',
                                 sasl_plain_username='******',
                                 sasl_plain_password='******')
        else:
            return KafkaConsumer(bootstrap_servers=p_str_broker_host)
Ejemplo n.º 2
0
 def read_groups(self):
     self.log.info("Kafka consumer running")
     self.consumer = KafkaConsumer(
         CONSUMER_OFFSET_TOPIC,
         group_id='offset_monitoring_consumer',
         bootstrap_servers=self.kafka_config.broker_list,
         auto_offset_reset='smallest',
         auto_commit_enable=False,
         consumer_timeout_ms=10000,
     )
     self.log.info("Consumer ready")
     self.watermarks = self.get_current_watermarks()
     while not self.finished():
         try:
             message = self.consumer.next()
             max_offset = self.get_max_offset(message.partition)
             if message.offset >= max_offset - 1:
                 self.finished_partitions.add(message.partition)
         except ConsumerTimeout:
             break
         except (
                 FailedPayloadsError,
                 KafkaUnavailableError,
                 LeaderNotAvailableError,
                 NotLeaderForPartitionError,
         ) as e:
             self.log.warning("Got %s, retrying", e.__class__.__name__)
         self.process_consumer_offset_message(message)
     return self.kafka_groups
Ejemplo n.º 3
0
def create_consumer(topics, brokers, group,
                    max_bytes=1024 * 1024, max_wait_ms=100):
    kafka = KafkaConsumer(*topics, metadata_broker_list=brokers,
                          group_id=group,
                          fetch_message_max_bytes=max_bytes,
                          fetch_wait_max_ms=max_wait_ms)
    return kafka
Ejemplo n.º 4
0
 def configure_internal_queues(self):
     """
     configures the internal queues used hold references to events in the input queue
     """
     for i in range(self.number_of_queues):
         client = KafkaClient(hosts=self.kafka_hosts)
         queue_name = SCHEDULER_QUEUE_FORMAT.format(2**i)
         client.ensure_topic_exists(queue_name)
         indexed_consumer = IndexedConsumer(self.input_topic,
                                            self.kafka_hosts)
         queue_consumer = KafkaConsumer(
             queue_name,
             bootstrap_servers=self.kafka_hosts,
             group_id=queue_name,
             consumer_timeout_ms=2000,
             auto_commit_enable=False,
         )
         queue_producer = SimpleProducer(client)
         queue_duration = 2**i
         self.queues.append(
             InternalQueue(
                 queue_consumer,
                 indexed_consumer,
                 queue_producer,
                 self.number_of_queues,
                 queue_duration,
             ))
Ejemplo n.º 5
0
def kafka_consumer_entrypoint():
    global g_config_kafka
    print("Kafka config: " + str(g_config_kafka))
    consumer = KafkaConsumer(
        g_config_kafka['topic_name'],
        g_config_kafka['group_id'],
        bootstrap_servers=[g_config_kafka['bootstrap_server']],
        security_protocol="SSL",
        ssl_cafile=g_config_kafka['ssl_cafile'],
        ssl_keyfile=g_config_kafka['ssl_keyfile'],
        ssl_certfile=g_config_kafka['ssl_certfile'])

    for message in consumer:
        # Extract message.value:
        try:
            value = json.loads(message.value.decode('utf-8'))
            # Message is a json dictionary of form:
            # { site_id: ..., status_code: ..., regex_results: ...}
            # print(";; debug: " + str(value))
            db_store_probe_results(value['site_id'], value['status_code'],
                                   value['regex_results'])
        except:
            print("Unable to parse message from the kafka topic.")
            raise
    return True
Ejemplo n.º 6
0
def get_kafka_consumer(topic: str, consumer_group: str) -> KafkaConsumer:
    return KafkaConsumer(topic,
                         bootstrap_servers=['localhost:9092'],
                         auto_offset_reset='latest',
                         enable_auto_commit=True,
                         group_id=consumer_group,
                         value_deserializer=lambda x: loads(x.decode('utf-8')))
Ejemplo n.º 7
0
 def __init__(self, topic, addr, auto_commit=False, auto_offset_reset="earliest"):
     """Initializes with Topic Name, Broker Address, and Consumer Settings"""
     self.consumer = KafkaConsumer(topic,
                                     bootstrap_servers=addr,
                                     value_deserializer=lambda m: json.loads(m.decode('ascii')),
                                     enable_auto_commit=auto_commit,
                                     auto_offset_reset=auto_offset_reset,
                                     api_version=(0,1,0))
Ejemplo n.º 8
0
    def read_groups(self, partition=None):
        self.consumer = KafkaConsumer(
            group_id='offset_monitoring_consumer',
            bootstrap_servers=self.kafka_config.broker_list,
            auto_offset_reset='earliest',
            enable_auto_commit=False,
            consumer_timeout_ms=30000,
            fetch_max_wait_ms=2000,
            max_partition_fetch_bytes=10 * 1024 * 1024,  # 10MB
        )

        if partition is not None:
            self.active_partitions = {
                partition: TopicPartition(CONSUMER_OFFSET_TOPIC, partition),
            }
        else:
            self.active_partitions = {
                p: TopicPartition(CONSUMER_OFFSET_TOPIC, p)
                for p in self.consumer.partitions_for_topic(
                    CONSUMER_OFFSET_TOPIC)
            }
        self.watermarks = self.get_current_watermarks(
            list(self.active_partitions.values()))
        # Active partitions are not empty. Remove the empty ones.
        self.active_partitions = {
            p: tp
            for p, tp in self.active_partitions.items()
            if tp.partition in self.watermarks
            and self.watermarks[tp.partition].highmark > 0 and self.watermarks[
                tp.partition].highmark > self.watermarks[tp.partition].lowmark
        }
        # Cannot consume if there are no active partitions
        if not self.active_partitions:
            return {}

        self.consumer.assign(list(self.active_partitions.values()))
        self.log.info("Consuming from %s", self.active_partitions)

        message_iterator = iter(self.consumer)

        while not self.finished():
            try:
                message = next(message_iterator)
            except StopIteration:
                continue
            # Stop when reaching the last message written to the
            # __consumer_offsets topic when KafkaGroupReader first started
            if message.offset >= self.watermarks[
                    message.partition].highmark - 1:
                self.remove_partition_from_consumer(message.partition)
            self.process_consumer_offset_message(message)

        self._remove_unsubscribed_topics()

        return {
            group: topics.keys()
            for group, topics in six.iteritems(self._kafka_groups) if topics
        }
Ejemplo n.º 9
0
def thread_main(topic):
    consumer = KafkaConsumer(topic,
                             group_id='kafka_monitor',
                             metadata_broker_list=broker_list)

    offset = consumer._offsets.fetch

    for part in offset:
        kafka_logsize.labels(topic=part[0],
                             partition=part[1]).set(offset[part])
Ejemplo n.º 10
0
def kafka_input(collector, **options):
    group_id = options.pop("group_id", "hackathon")
    broker = options.pop("broker", os.getenv("KAFKA_BROKER", "").split(","))

    consumer = KafkaConsumer(collector, metadata_broker_list=broker,
                             group_id=group_id, auto_commit_enable=False)
    return {
        "collector": collector,
        "files": [KafkaInputBview(consumer, collector), kafka_iter(consumer)],
        "format": kafka_format
    }
Ejemplo n.º 11
0
    def _create_kafka_consumer(self):
        consumer = KafkaConsumer(
            self._kafka_topic,
            bootstrap_servers=self._kafka_brokers,
            auto_offset_reset=self._kafka_start_offset,
            # largest #当zookeeper中没有初始的offset时,或者超出offset上限时的处理方式 。
            enable_auto_commit=False,
            ## true时,Consumer会在消费消息后将offset同步到zookeeper,这样当Consumer失败后,新的consumer就能从zookeeper获取最新的offset
            client_id=str(uuid.uuid1()) if id == None else id,
            group_id=self._kafka_group)  # discard old ones

        return consumer
Ejemplo n.º 12
0
 def initialize(self, stormconf, context):
     #self.words = itertools.cycle(['dog', 'cat',
     #                              'zebra', 'elephant'])
     #self.sentences = [
     #    "She advised him to take a long holiday, so he immediately quit work and took a trip around the world",
     #    "I was very glad to get a present from her",
     #    "He will be here in half an hour",
     #    "She saw him eating a sandwich",
     #]
     #self.sentences = itertools.cycle(self.sentences)
     self.consumer = KafkaConsumer(b'twitterstream',
                                   bootstrap_servers=['0.0.0.0:9092'])
Ejemplo n.º 13
0
    def run(self):
        consumer = KafkaConsumer(bootstrap_servers=self.bootstrap_servers,
                                 auto_offset_reset='earliest',
                                 group_id=self.group,
                                 consumer_timeout_ms=1000)
        consumer.subscribe(self.topics)
        while not self.stop_event.is_set():
            for message in consumer:
                print(message)
                if self.stop_event.is_set():
                    break

        consumer.close()
Ejemplo n.º 14
0
def CheckTopicExsited(topic):

    consumer = KafkaConsumer(bootstrap_servers=tmpbootstrap_servers,
                             enable_auto_commit=False,
                             group_id='consumer')

    # Get all partitions by topic
    par = consumer.partitions_for_topic(topic)
    print(par)

    if par is None:
        return False
    return True
Ejemplo n.º 15
0
 def configure_input_queue(self):
     """
     configures the input queue that other services can use to schedule an event to be delivered
     """
     client = KafkaClient(hosts=self.kafka_hosts)
     client.ensure_topic_exists(self.input_topic)
     indexed_consumer = IndexedConsumer(self.input_topic, self.kafka_hosts)
     queue_consumer = KafkaConsumer(self.input_topic,
                                    bootstrap_servers=self.kafka_hosts,
                                    group_id=CONSUMER_GROUP)
     queue_producer = SimpleProducer(KafkaClient(hosts=self.kafka_hosts))
     self.queues.append(
         InputQueue(queue_consumer, indexed_consumer, queue_producer,
                    self.number_of_queues))
Ejemplo n.º 16
0
def consume(args):
    schema = args.schema
    table = args.table
    assert schema in settings.SCHEMAS, 'schema must in settings.SCHEMAS'
    assert table in settings.TABLES, 'table must in settings.TABLES'
    group_id = f'{schema}.{table}'
    consumer = KafkaConsumer(
        bootstrap_servers=settings.KAFKA_SERVER,
        value_deserializer=lambda x: json.loads(x, object_hook=object_hook),
        key_deserializer=lambda x: x.decode() if x else None,
        enable_auto_commit=False,
        group_id=group_id,
        auto_offset_reset='earliest',
    )
    topic = settings.KAFKA_TOPIC
    partition = settings.PARTITIONS.get(group_id)
    consumer.assign([TopicPartition(topic, partition)])
    event_list = []
    logger.info(
        f'success consume topic:{topic},partition:{partition},schema:{schema},table:{table}'
    )
    pk = reader.get_primary_key(schema, table)
    for msg in consumer:  # type:ConsumerRecord
        logger.debug(f'kafka msg:{msg}')
        event = msg.value
        event_list.append(event)
        len_event = len(event_list)
        if len_event == settings.INSERT_NUMS or (
            (int(time.time() * 10**6) - event_list[0]['event_unixtime']) /
                10**6 >= settings.INSERT_INTERVAL > 0):
            data_dict = {}
            tmp_data = []
            for items in event_list:
                action = items['action']
                action_core = items['action_core']
                data_dict.setdefault(table + schema + action + action_core,
                                     []).append(items)
            for k, v in data_dict.items():
                tmp_data.append(v)
            result = writer.insert_event(tmp_data, settings.SKIP_TYPE,
                                         settings.SKIP_DELETE_TB_NAME, schema,
                                         table, pk)
            if result:
                event_list = []
                consumer.commit()
                logger.info(f'commit success {len_event} events!')
            else:
                logger.error('insert event error!')
                exit()
Ejemplo n.º 17
0
 def receive_message(self, cgroup_name):
     consumer = KafkaConsumer(TOPIC,
                              group_id=cgroup_name,
                              bootstrap_servers=[BOOTSTRAP_IP])
     try:
         for msg in consumer:
             msg = msg.value
             logger.info("consumer receive message %s" % msg)
             future = self.thread_pool.submit(self.msg_handler, (msg))
             future.add_done_callback(self.callback_handler)
     except Exception:
         logger.error("consumer error")
         logger.error(traceback.format_exc())
     finally:
         self.thread_pool.shutdown(wait=True)
Ejemplo n.º 18
0
 def __init__(self, kafkaHost, kafkaPort, tcpHost, tcpPort, group_id, topic,
              logTopic, interval):
     self.kafkaHost = kafkaHost
     self.kafkaPort = kafkaPort
     self.tcpHost = tcpHost
     self.tcpPort = tcpPort
     self.group_id = group_id
     self.topic = topic
     self.logTopic = logTopic
     self.interval = int(interval)
     self.consumer = KafkaConsumer(
         topic,
         bootstrap_servers=["{}:{}".format(kafkaHost, kafkaPort)],
         group_id=group_id,
         enable_auto_commit=False)
     self.producer = KafkaProducer(
         bootstrap_servers=["{}:{}".format(kafkaHost, kafkaPort)])
     self.tcpWriter = None
Ejemplo n.º 19
0
 def __init__(self, kafka_host, kafka_port, tcp_host, tcp_port, topic,
              log_topic):
     self.kafka_host = kafka_host
     self.kafka_port = kafka_port
     self.tcp_host = tcp_host
     self.tcp_port = tcp_port
     self.topic = topic
     self.log_topic = log_topic
     self.consumer = KafkaConsumer(
         topic,
         bootstrap_servers=["{}:{}".format(kafka_host, kafka_port)],
         enable_auto_commit=False,
         max_poll_records=1024 * 1024,
         max_partition_fetch_bytes=1024 * 1024 * 100)
     self.producer = KafkaProducer(
         bootstrap_servers=["{}:{}".format(kafka_host, kafka_port)])
     self.connections = {}
     self.sample_end_time = self.get_end_time(time())
     self.lastPolled = []
Ejemplo n.º 20
0
    def run(self):
        avro_serde = AvroSerDe(AVRO_SCHEMA_STRING)
        client = KafkaClient('localhost:9092')
        consumer = KafkaConsumer(KAFKA_TOPIC,
                                 group_id='my_group',
                                 bootstrap_servers=['localhost:9092'])

        # Keep track of and print statistics.
        attempts = 0
        failures = 0
        failure_rate = 0.0
        for message in consumer:
            event = avro_serde.bytes_to_obj(message.value)
            print '--> ' + str(event)
            if event['op'] == 'login':
                attempts += 1
                if not event['success']: failures += 1
                failure_rate = float(failures) / attempts
            print '--> Event: ' + str(event)
            print '--> Failure Rate: ' + str(failure_rate)
Ejemplo n.º 21
0
def commitTopic(topic, group, partition, commit_offset):
    try:
        print(
            '===================================================================================='
        )
        print('[commitTopic] : topic=' + topic + ', group=' + group +
              ', partition=' + str(partition) + ', commit_offset=' +
              str(commit_offset))
        consumer2 = KafkaConsumer(bootstrap_servers=tmpbootstrap_servers,
                                  enable_auto_commit=False,
                                  group_id=group)
        tp = TopicPartition(topic, partition)

        if int(commit_offset) > 0:

            consumer2.commit({tp: OffsetAndMetadata(commit_offset, None)})

    except Exception as ee:
        print('error when commit Topic')
        print(str(ee))
    finally:
        print('commitTopic end')
Ejemplo n.º 22
0
    import argparse

    relations, childs, parents = caida_filter_annaunce(
        "20160101.as-rel.txt", "20160101.ppdc-ases.txt")

    print(len(relations), len(childs), len(parents))
    parser = argparse.ArgumentParser(
        description="get a feed of abnormal BGP conflicts")
    parser.add_argument("--offset", type=int)

    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)

    consumer = KafkaConsumer("hijacks",
                             bootstrap_servers=["comet-17-08.sdsc.edu:9092"],
                             group_id="client")
    if args.offset is not None:
        topics = [("hijacks", i, args.offset) for i in PARTITIONS.values()]
        consumer.set_topic_partitions(*topics)

    hijacks = 0
    total = 0
    for item in consumer:
        total += 1
        if (is_legittimate(relations, childs, parents,
                           json.loads(item.value)) == 0):
            hijacks += 1
            #print(item.value)

        if (total == 10000): print(total, hijacks)
Ejemplo n.º 23
0
from kafka.client import KafkaClient
from kafka.consumer import KafkaConsumer
from kafka.producer import SimpleProducer

import numpy as np
from sklearn import svm
from sklearn.externals import joblib

import mysql.connector
from datetime import datetime

import json


client = KafkaClient("ip-172-31-28-55.ec2.internal:6667")
consumer = KafkaConsumer("shm", metadata_broker_list=['ip-172-31-28-55.ec2.internal:6667'])
#consumer = KafkaConsumer("shm", metadata_broker_list=['ip-172-31-28-55.ec2.internal:6667'])

conn = mysql.connector.connect(user='******', password='******',
                              host='iotshm-data.ck3sx5qm0blx.us-west-2.rds.amazonaws.com',
                              database='iotshm')

cursor = conn.cursor()

#add_health = ("""INSERT IGNORE INTO iotshm.Health (sensor_id, timestamp, reading_type, healthy) VALUES (%s, %s, %s, %s)""")
add_magnitude = ("""INSERT IGNORE INTO iotshm.Magnitude (frequency, sensor_id, magnitude, reading_type, timestamp, healthy) VALUES(%s, %s, %s, %s, %s, %s)""")


# TODO add new classifier files and change file names
x_clf = joblib.load('xClf.pkl')
y_clf = joblib.load('xClf.pkl')
Ejemplo n.º 24
0
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('Basics').getOrCreate()

# import pyspark class Row from module sql
from pyspark.sql import *
from pyspark.sql.types import *
import tempfile

# ml
from pyspark.ml import Pipeline
from pyspark.ml import PipelineModel

# start a kafka consumer session
from kafka.consumer import KafkaConsumer
consumer = KafkaConsumer(
    "titanic",
    bootstrap_servers=['ip-172-31-12-218.us-east-2.compute.internal:6667'])
print('consumer launched')

testSchema = [
    "PassengerId", "Pclass", "Name", "Sex", "Age", "SibSp", "Parch", "Ticket",
    "Fare", "Cabin", "Embarked"
]

pipeline = Pipeline.load("/home/ubuntu/titanic/pipeline")
model = PipelineModel.load("/home/ubuntu/titanic/model")


def getTrain(msg):
    # put passenger info into dataframe
    # print msg
Ejemplo n.º 25
0
    broker = SimpleClient(kafka)
    lags = {}
    zk = KazooClient(hosts=zookeepers, read_only=True)
    zk.start()
    logsize = 0
    #    topics=zk.get_children("/consumers/%s/owners" % (group) )
    topic = sys.argv[1]
    data_need = sys.argv[2]
    #    for topic in topics:
    if topic:
        logsize = 0
        #	print topic
        partitions = broker.get_partition_ids_for_topic(topic)
        #	print partitions
        consumer = KafkaConsumer(broker, group, str(topic))
        responses = broker.send_offset_fetch_request(
            group, [OffsetFetchRequestPayload(topic, p) for p in partitions],
            fail_on_error=True)
        #	print responses
        latest_offset = 0
        for res in responses:
            if topic != "test":
                latest_offset += res[2]
            #	print latest_offset
        for partition in partitions:
            log = "/consumers/%s/offsets/%s/%s" % (group, topic, partition)
            if zk.exists(log):
                data, stat = zk.get(log)
                logsize += int(data)
            #	print logsize
Ejemplo n.º 26
0
def main():
    logger = logging.getLogger(os.path.basename(__file__))

    # Setup Aiven SDK
    logger.info("Setting up Aiven SDK")
    client = AivenClient("https://api.aiven.io")
    client.set_auth_token(os.environ["AIVEN_TOKEN"])

    # Lookup the target service
    logger.info("Looking up the target Aiven Kafka Service")
    service = client.get_service(project=os.environ["AIVEN_PROJECT"],
                                 service=os.environ["AIVEN_SERVICE"])
    if not service:
        raise SystemExit("Failed to look up the target service")

    # Store credentials on disk. This is using the main access certificates (avnadmin).
    logger.info("Storing Aiven service access credentials")
    with open("client.crt", "w") as fh:
        fh.write(service["connection_info"]["kafka_access_cert"])
    with open("client.key", "w") as fh:
        fh.write(service["connection_info"]["kafka_access_key"])

    # Project CA certificate
    logger.info("Fetching project CA certificate")
    result = client.get_project_ca(project=os.environ["AIVEN_PROJECT"])
    with open("ca.crt", "w") as fh:
        fh.write(result["certificate"])

    # Initialize Kafka client
    kafka_client = KafkaConsumer(
        bootstrap_servers=service["service_uri"],
        security_protocol="SSL",
        ssl_cafile="ca.crt",
        ssl_certfile="client.crt",
        ssl_keyfile="client.key",
    )

    partitions = kafka_client.partitions_for_topic(os.environ["AIVEN_TOPIC"])
    tps = [
        TopicPartition(os.environ["AIVEN_TOPIC"], partition)
        for partition in partitions
    ]
    last_timestamp = time.monotonic()
    last_offsets = {}

    logger.info("Start result collection loop, break with CTRL-C")
    readings = []
    while True:
        delta = 0
        result = kafka_client.end_offsets(tps)
        timenow = time.monotonic()
        for tp, offset in result.items():
            if tp in last_offsets:
                delta += offset - last_offsets[tp]
            last_offsets[tp] = offset

        messages_per_second = int(delta / (timenow - last_timestamp))

        readings.append(messages_per_second)
        readings = readings[-30:]

        logger.info("%d messages/s, 30 sample average %d messages/s",
                    messages_per_second,
                    sum(readings) / len(readings))
        last_timestamp = timenow
        time.sleep(2)
Ejemplo n.º 27
0
from kafka.consumer import KafkaConsumer
from json import loads
from mongoengine import *

from matilda.data_pipeline import object_model

consumer = KafkaConsumer(
    'numtest',  # kafka topic
    bootstrap_servers=['localhost:9092'],  # same as our producer
    # It handles where the consumer restarts reading after breaking down or being turned off and can be set either
    # to earliest or latest. When set to latest, the consumer starts reading at the end of the log.
    # When set to earliest, the consumer starts reading at the latest committed offset.
    auto_offset_reset='earliest',
    enable_auto_commit=
    True,  # makes sure the consumer commits its read offset every interval.
    # join a consumer group for dynamic partition assignment and offset commits
    # a consumer needs to be part of a consumer group to make the auto commit work.
    # otherwise, need to do it manually i.e. consumer.assign([TopicPartition('foobar', 2)]); msg = next(consumer)
    group_id='my-group',
    # deserialize encoded values
    value_deserializer=lambda x: loads(x.decode('utf-8')))


def get_atlas_db_url(username, password, dbname):
    return f"mongodb+srv://{username}:{password}@cluster0.ptrie.mongodb.net/{dbname}?retryWrites=true&w=majority&" \
           f"ssl=true"


atlas_url = get_atlas_db_url(username='******',
                             password='******',
                             dbname='matilda-db')
Ejemplo n.º 28
0
def consume(args):
    schema = args.schema
    tables = args.tables
    skip_error = args.skip_error
    assert schema in settings.SCHEMAS, f'schema {schema} must in settings.SCHEMAS'
    topic = settings.KAFKA_TOPIC
    tables_pk = {}
    partitions = []
    for table in tables.split(','):
        assert table in settings.TABLES, f'table {table} must in settings.TABLES'

        partition = settings.PARTITIONS.get(f'{schema}.{table}')
        tp = TopicPartition(topic, partition)
        partitions.append(tp)
        tables_pk[table] = reader.get_primary_key(schema, table)

    group_id = f'{schema}.{tables}'
    consumer = KafkaConsumer(
        bootstrap_servers=settings.KAFKA_SERVER,
        value_deserializer=lambda x: json.loads(x, object_hook=object_hook),
        key_deserializer=lambda x: x.decode() if x else None,
        enable_auto_commit=False,
        group_id=group_id,
        auto_offset_reset='earliest',
    )
    consumer.assign(partitions)

    event_list = {}
    is_insert = False
    last_time = 0
    len_event = 0
    logger.info(f'success consume topic:{topic},partitions:{partitions},schema:{schema},tables:{tables}')

    for msg in consumer:  # type:ConsumerRecord
        logger.debug(f'kafka msg:{msg}')
        event = msg.value
        event_unixtime = event['event_unixtime'] / 10 ** 6
        table = event['table']
        schema = event['schema']
        event_list.setdefault(table, []).append(event)
        len_event += 1

        if last_time == 0:
            last_time = event_unixtime

        if len_event == settings.INSERT_NUMS:
            is_insert = True
        else:
            if event_unixtime - last_time >= settings.INSERT_INTERVAL > 0:
                is_insert = True
        if is_insert:
            data_dict = {}
            events_num = 0
            for table, items in event_list.items():
                for item in items:
                    action = item['action']
                    action_core = item['action_core']
                    data_dict.setdefault(table, {}).setdefault(table + schema + action + action_core, []).append(item)
            for table, v in data_dict.items():
                tmp_data = []
                for k1, v1 in v.items():
                    events_num += len(v1)
                    tmp_data.append(v1)
                try:
                    result = writer.insert_event(tmp_data, schema, table, tables_pk.get(table))
                    if not result:
                        logger.error('insert event error!')
                        if not skip_error:
                            exit()
                except Exception as e:
                    logger.error(f'insert event error!,error:{e}')
                    if not skip_error:
                        exit()
            consumer.commit()
            logger.info(f'commit success {events_num} events!')
            event_list = {}
            is_insert = False
            len_event = last_time = 0
Ejemplo n.º 29
0
from kafka.consumer import KafkaConsumer
import config_db
import ast

kafka_boostrap_servers = '127.0.0.1:9092'
kafka_topic_name = 'temperaturas'

consumer = KafkaConsumer(kafka_topic_name,
                         bootstrap_servers=kafka_boostrap_servers,
                         auto_offset_reset='earliest',
                         enable_auto_commit=False)  # earliest
for message in consumer:
    item = message.value.decode('utf-8')
    config_db.inserir_database(ast.literal_eval(item))
    print(message.value.decode('utf-8'))
Ejemplo n.º 30
0
def getMsgData(topic, group, result, maxsize):
    try:
        saveResult = SaveDataResult()
        saveResult.guid = str(uuid.uuid4())
        saveResult.CreateDate = datetime.datetime.now().strftime(
            "%Y-%m-%d %H:%M:%S")

        msgInfos = []
        result.guid = saveResult.guid
        result.topic_messages = []

        consumer = KafkaConsumer(bootstrap_servers=tmpbootstrap_servers,
                                 enable_auto_commit=False,
                                 group_id=group)

        # Get all partitions by topic
        par = consumer.partitions_for_topic(topic)

        now_count = 0

        for p in par:
            tp = TopicPartition(topic, p)
            consumer.assign([tp])
            print(tp)
            info = MsgPartitionInfo()

            # Get committed offset
            print('start to get committed offset.....')
            try:
                committed = consumer.committed(tp) or 0
            except Exception, e_commit:
                print(str(e_commit))

            # Move consumer to end to get the last position
            consumer.seek_to_end(tp)
            last_offset = consumer.position(tp)

            # Move consumer to beginning to get the first position
            consumer.seek_to_beginning()
            now_offset = consumer.position(tp)
            from_offset = committed

            if from_offset is None:
                from_offset = now_offset

            if from_offset < now_offset:
                from_offset = now_offset

            info.partition_ID = tp.partition
            info.get_last_offset = last_offset
            msgInfos.append(info)

            print("[%s] partition(%s) -> now:%s,  last:%s,  committed:%s" %
                  (tp.topic, tp.partition, now_offset, last_offset, committed))

            # Get msg from position to offset
            while (from_offset < last_offset) and (now_count < maxsize):
                consumer.seek(tp, from_offset)
                polldata = consumer.poll(100)
                from_offset += 1
                now_count += 1
                print('now_count=' + str(now_count))
                result.topic_messages.append(polldata[tp][0].value)

        saveResult.MsgInfo = json.dumps(msgInfos,
                                        default=encode_MsgPartitionInfo,
                                        ensure_ascii=False)
        print(saveResult.MsgInfo)
        consumer.close()
        saveResult.message = "Success"
        saveResult.Code = 200

        producer = KafkaProducer(bootstrap_servers=tmpbootstrap_servers)
        producer.send(topic + "_log",
                      json.dumps(saveResult, default=encode_SaveDataResult))
        producer.flush()