Python Consumer.subscribe Exemples, confluent_kafka.Consumer.subscribe Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : online-tensorflow-consumer.py Projet : fluxcapacitor/pipeline

def open_consumer(stream_host_and_port_list, topic_name, group_name):
    consumer = Consumer({'bootstrap.servers': stream_host_and_port_list, # kafka broker
                           'group.id': group_name, # consumer group
                           'api.version.request':'true'
                        })
    consumer.subscribe([topic_name])
    return consumer

Exemple #2

0

Afficher le fichier

Fichier : Task.py Projet : newbelee/opsweb

def analytics_internet3_logs():
    consumer = Consumer({'bootstrap.servers': kafka_hosts, 'group.id': 'Internet3_logs_%s' %dt,
                         'default.topic.config': {'auto.offset.reset': 'latest', 'auto.commit.enable': 'true'}})
    consumer.subscribe(['haproxy_logs'])
    try:
        while True:
            msg = consumer.poll()
            if not msg.error():
                Msg = msg.value().decode('utf-8').strip()
                try:
                    tm = time.strftime('%Y%m%d%H%M', time.localtime())
                    if Msg:
                        Msg = Msg.split()
                        if len(Msg) >= 17:
                            internet_access_minute = 'internet_access_minute_%s' % tm
                            RC.incr(internet_access_minute)
                            RC.expire(internet_access_minute,3600)
                except Exception as e:
                    logging.error(e)
                    continue
            elif msg.error().code() != KafkaError._PARTITION_EOF:
                logging.error(msg.error())
                continue
    except Exception as e:
        logging.error(e)
    finally:
        consumer.close()

Exemple #3

0

Afficher le fichier

Fichier : test_Consumer.py Projet : confluentinc/confluent-kafka-python

def test_any_method_after_close_throws_exception():
    """ Calling any consumer method after close should thorw a RuntimeError
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])
    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.subscribe(['test'])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.unsubscribe()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.poll()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.consume()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.assign([TopicPartition('test', 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.unassign()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.assignment()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.commit()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.committed([TopicPartition("test", 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.position([TopicPartition("test", 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.seek([TopicPartition("test", 0, 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        lo, hi = c.get_watermark_offsets(TopicPartition("test", 0))
    assert 'Consumer closed' == str(ex.value)

Exemple #4

0

Afficher le fichier

Fichier : test_Consumer.py Projet : aaronhanson/confluent-kafka-python

def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb (err, partitions):
        pass

    kc = Consumer({'group.id':'test', 'socket.timeout.ms':'100',
                   'session.timeout.ms': 1000, # Avoid close() blocking too long
                   'on_commit': dummy_commit_cb})

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke (consumer, partitions):
        pass

    kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    partitions = list(map(lambda p: TopicPartition("test", p), range(0,100,3)))
    kc.assign(partitions)

    kc.unassign()

    kc.commit(async=True)

    try:
        kc.commit(async=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions if p.offset == -1001]) == len(partitions)

    try:
        offsets = kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT


    kc.close()

Exemple #5

0

Afficher le fichier

Fichier : kafkaconfluent.py Projet : clement-daubrenet/sandbox

def consume():

    c = Consumer({'bootstrap.servers': KAFKA_SERVER, 'group.id': 'mygroup',
              'default.topic.config': {'auto.offset.reset': 'smallest'}})
    c.subscribe([KAFKA_TOPIC])
    while True:
        msg = c.poll()
        if not msg.error():
            print('Received message: %s' % msg.value().decode('utf-8'))
    c.close()

Exemple #6

0

Afficher le fichier

Fichier : Subscriber.py Projet : shrinivaasanka/asfer-github-code

def subscribe():
	c = Consumer({'bootstrap.servers': '0', 'group.id': 'test-consumer-group', 'default.topic.config': {'auto.offset.reset': 'smallest'}})
	c.subscribe(['neuronraindata'])
	while True:
	    msg = c.poll()
	    if not msg.error() and msg.value():
	        print('Received message: ' , msg.value().encode("utf-8"))
	    else:
	        print(msg.error())
	c.close()

Exemple #7

0

Afficher le fichier

Fichier : kafka_receivers.py Projet : iadgov/WALKOFF

class KafkaWorkflowResultsReceiver(object):
    _requires = ['confluent-kafka']

    def __init__(self, message_converter=ProtobufWorkflowResultsConverter, current_app=None):
        import walkoff.server.workflowresults  # Need this import

        self.thread_exit = False

        kafka_config = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_CONFIG
        self.receiver = Consumer(kafka_config)
        self.topic = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_TOPIC
        self.message_converter = message_converter
        self.workflows_executed = 0

        if current_app is None:
            self.current_app = Flask(__name__)
            self.current_app.config.from_object(walkoff.config.Config)
            self.current_app.running_context = context.Context(init_all=False)
        else:
            self.current_app = current_app

    def receive_results(self):
        """Constantly receives data from the Kafka Consumer and handles it accordingly"""
        logger.info('Starting Kafka workflow results receiver')
        self.receiver.subscribe(['{}.*'.format(self.topic)])
        while not self.thread_exit:
            raw_message = self.receiver.poll(1.0)
            if raw_message is None:
                gevent.sleep(0.1)
                continue
            if raw_message.error():
                if raw_message.error().code() == KafkaError._PARTITION_EOF:
                    gevent.sleep(0.1)
                    continue
                else:
                    logger.error('Received an error in Kafka receiver: {}'.format(raw_message.error()))
                    gevent.sleep(0.1)
                    continue
            with self.current_app.app_context():
                self._send_callback(raw_message.value())
        self.receiver.close()
        return

    def _send_callback(self, message_bytes):
        event, sender, data = self.message_converter.to_event_callback(message_bytes)

        if sender is not None and event is not None:
            with self.current_app.app_context():
                event.send(sender, data=data)
            if event in [WalkoffEvent.WorkflowShutdown, WalkoffEvent.WorkflowAborted]:
                self._increment_execution_count()

    def _increment_execution_count(self):
        self.workflows_executed += 1

Exemple #8

0

Afficher le fichier

Fichier : Task.py Projet : newbelee/opsweb

def analytics_intranet_logs():
    consumer = Consumer({'bootstrap.servers': kafka_hosts, 'group.id': 'Intranet_logs_%s' %dt,'default.topic.config': {'auto.offset.reset': 'latest','auto.commit.enable':'true'}})
    consumer.subscribe(['haproxy2_logs'])
    try:
        while True:
            msg = consumer.poll()
            if not msg.error():
                Msg = msg.value().decode('utf-8').strip()
                try:
                    tt = time.strftime('%Y%m%d', time.localtime())
                    th = time.strftime('%Y%m%d%H', time.localtime())
                    tm = time.strftime('%Y%m%d%H%M', time.localtime())
                    H_key = 'haproxy2_topic_%s' % tt
                    top2_url_hour = 'top2_url_hour_%s' % th
                    top2_url_minute = 'top2_url_minute_%s' % tm
                    if len(Msg.split()) >= 17:
                        val = Msg.split('{')
                        if len(val) >= 2:
                            Topic = val[1].split('}')[0]
                            Rtime = val[0].split()[8]
                            Rtime = int(Rtime.split('/')[4])
                            if ':' in Topic:
                                Topic = str(Topic.split(':')[0])
                            if '|' in Topic:
                                Topic = str(Topic.split('|')[0])
                            if '.baihe.com' in Topic:
                                Key = 'haproxy2_logs_%s_%s' % (tt, Topic)
                                Rt_Key = 'Rtime2_%s_%s' % (tt, Topic)
                                # 接口
                                PATH = str(Msg.split()[17]).split('?')[0]
                                URL = 'http://%s%s' % (Topic,PATH)
                                RC.zincrby(top2_url_hour, URL, 1)
                                RC.zincrby(top2_url_minute, URL, 1)
                                for KEY in (H_key, Key, Rt_Key,top2_url_hour,top2_url_minute):
                                    RC.expire(KEY,3600)
                                RC.sadd(H_key, Topic)
                                RC.incr(Key)
                                if Rtime:
                                    RC.lpush(Rt_Key, Rtime)
                except Exception as e:
                    logging.error(e)
                    continue
            elif msg.error().code() != KafkaError._PARTITION_EOF:
                logging.error(msg.error())
                continue
    except Exception as e:
        logging.error(e)
    finally:
        consumer.close()

Exemple #9

0

Afficher le fichier

Fichier : Task.py Projet : newbelee/opsweb

def analytics_internet_logs():
    consumer = Consumer({'bootstrap.servers': kafka_hosts, 'group.id': 'Internet_logs_%s' %dt,'default.topic.config': {'auto.offset.reset': 'latest','auto.commit.enable':'true'}})
    consumer.subscribe(['haproxy_logs'])
    try:
        while True:
            msg = consumer.poll()
            if not msg.error():
                Msg = msg.value().decode('utf-8').strip()
                try:
                    tt = time.strftime('%Y%m%d', time.localtime())
                    th = time.strftime('%Y%m%d%H', time.localtime())
                    pv_key = 'baihe_pv_%s' % tt
                    if Msg:
                        Msg = Msg.split()
                        RC.incr(pv_key)
                        if len(Msg) >= 17:
                            Topic = str(Msg[14]).split('|')[0].replace('{', '').strip()
                            IP = str(Msg[5])
                            H_key = 'haproxy_topic_%s' % tt
                            top_ip = 'top_ip_%s' % tt
                            top_ip_hour = 'top_ip_%s' % th
                            top_url_hour = 'top_url_%s' % th
                            PATH = str(Msg[16]).split('?')[0]
                            URL = 'http://%s%s' % (Topic,PATH)
                            Ha_Key = 'haproxy_logs_%s_%s' % (tt, Topic)
                            top_ip_domain = 'top_%s_domain_%s' % (IP, tt)
                            top_ip_domain_hour = 'top_%s_domain_%s' % (IP, th)
                            for KEY in (H_key, pv_key, top_ip, top_url_hour, top_ip_hour,Ha_Key, top_ip_domain, top_ip_domain_hour):
                                RC.expire(KEY,3600)
                            RC.sadd(H_key, Topic)
                            RC.incr(Ha_Key)
                            # ip
                            RC.zincrby(top_ip, IP, 1)
                            RC.zincrby(top_ip_hour, IP, 1)
                            # IP_接口
                            RC.zincrby(top_ip_domain, URL, 1)
                            RC.zincrby(top_ip_domain_hour, URL, 1)
                            # 接口
                            RC.zincrby(top_url_hour, URL, 1)
                except:
                    continue
            elif msg.error().code() != KafkaError._PARTITION_EOF:
                logging.error(msg.error())
                continue
    except Exception as e:
        logging.error(e)
    finally:
        consumer.close()

Exemple #10

0

Afficher le fichier

Fichier : test_Consumer.py Projet : confluentinc/confluent-kafka-python

def test_multiple_close_throw_exception():
    """ Calling Consumer.close() multiple times should throw Runtime Exception
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])

    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.close()
    assert 'Consumer already closed' == str(ex.value)

Exemple #11

0

Afficher le fichier

Fichier : Task.py Projet : newbelee/opsweb

def WAF_logs():
    consumer = Consumer({'bootstrap.servers': kafka_hosts, 'group.id': 'Waf_logs_%s' %dt,'default.topic.config': {'auto.offset.reset': 'latest','auto.commit.enable':'true'}})
    consumer.subscribe(['haproxy_logs'])
    try:
        while True:
            msg = consumer.poll()
            if not msg.error():
                Msg = msg.value().decode('utf-8').strip()
                try:
                    tm = time.strftime('%Y%m%d%H%M',time.localtime())
                    if Msg:
                        Msg = Msg.split()
                        if len(Msg) >= 17:
                            url_code = Msg[9]
                            Topic =str(Msg[14]).split('|')[0].replace('{','').strip()
                            IP = str(Msg[5])
                            if url_code in ('200', '206', '301', '302', '304', '404'):
                                top_ip_minute = 'top_ip_%s' % tm
                                top_url_minute = 'top_url_%s' % tm
                                PATH = str(Msg[16]).split('?')[0]
                                URL = 'http://%s%s' % (Topic,PATH)
                                top_ip_domain_minute = 'top_%s_domain_%s' % (IP, tm)
                                top_url_ip_minute = 'top_%s_ip_%s' % (URL, tm)
                                # ip
                                RC.zincrby(top_ip_minute, IP, 1)
                                RC.expire(top_ip_minute, 300)
                                # IP_接口
                                RC.zincrby(top_ip_domain_minute, URL, 1)
                                RC.expire(top_ip_domain_minute, 300)
                                # 接口
                                RC.zincrby(top_url_minute, URL, 1)
                                RC.expire(top_url_minute, 300)
                                # 接口_ip
                                RC.zincrby(top_url_ip_minute, IP, 1)
                                RC.expire(top_url_ip_minute, 300)
                except Exception as e:
                    logging.error(e)
                    continue
            elif msg.error().code() != KafkaError._PARTITION_EOF:
                logging.error(msg.error())
                continue
    except Exception as e:
        logging.error(e)
    finally:
        consumer.close()

Exemple #12

0

Afficher le fichier

Fichier : test_Consumer.py Projet : confluentinc/confluent-kafka-python

def test_store_offsets():
    """ Basic store_offsets() tests """

    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])

    try:
        c.store_offsets(offsets=[TopicPartition("test", 0, 42)])
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._UNKNOWN_PARTITION

    c.unsubscribe()
    c.close()

Exemple #13

0

Afficher le fichier

Fichier : test_Consumer.py Projet : confluentinc/confluent-kafka-python

def test_calling_store_offsets_after_close_throws_erro():
    """ calling store_offset after close should throw RuntimeError """

    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])
    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.store_offsets(offsets=[TopicPartition("test", 0, 42)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.offsets_for_times([TopicPartition("test", 0)])
    assert 'Consumer closed' == str(ex.value)

Exemple #14

0

Afficher le fichier

Fichier : eventlib.py Projet : DataReply/eventlib

async def consume_events(topic, group, brokers, callback, schema=None,registry=None,delay=0.01,**kwargs):
    """
    Connect to the Kafka endpoint and start consuming
    messages from the given `topic`.
    The given callback is applied on each
    message.
    """    
    global consumer
    if topic in consumers:
        raise RuntimeError("A consumer already exists for topic: %s" % topic)

    if (not registry_serializer or not registry_client) and registry:
        r_client,serializer = create_registry_client(registry)


    consumer = Consumer({'bootstrap.servers': brokers, 'group.id': group,
              'default.topic.config': {'auto.offset.reset': 'largest'}})
    consumer.subscribe([topic])
    consumers[topic] = consumer


    try:
        while True:
            message = consumer.poll(1)
            if message:
               if not message.error():
                   if registry:
                       message = serializer.decode_message(message.value())
                   else:
                       message = message.value()

                   await callback(message)
                   consumer.commit()
            else:
                   await asyncio.sleep(delay)
    except KafkaException as ex:
        pass
    else:
        consumer.close()
    finally:
        consumers.pop(topic, None)

Exemple #15

0

Afficher le fichier

Fichier : test.py Projet : hyunjun/practice

if __name__ == '__main__':
  from confluent_kafka import Consumer, KafkaError

  # 'enable.partition.eof': False
  # https://github.com/confluentinc/confluent-kafka-python/issues/283
  # https://github.com/confluentinc/confluent-kafka-python/issues/176
  # https://github.com/edenhill/librdkafka/issues/1024
  c = Consumer({'bootstrap.servers': '<kafka server>', 'group.id': 'mygroup', 'enable.partition.eof': False, 'default.topic.config': {'auto.offset.reset': 'smallest'}})
  c.subscribe(['<topic>'])
  running = True
  while running:
    msg = c.poll()
    if not msg.error():
      print('Received message: %s' % msg.value().decode('utf-8'))
    elif msg.error().code() != KafkaError._PARTITION_EOF:
      print(msg.error())
      running = False
  c.close()

Exemple #16

0

Afficher le fichier

Fichier : consumer.py Projet : JonZeolla/incubator-metron

def consumer(args, poll_timeout=3.0):
    """ Consumes packets from a Kafka topic. """

    # setup the signal handler
    signal.signal(signal.SIGINT, signal_handler)

    # where to start consuming messages from
    kafka_offset_options = {
        "begin": seek_to_begin,
        "end": seek_to_end,
        "stored": seek_to_stored
    }
    on_assign_cb = kafka_offset_options[args.kafka_offset]

    # connect to kafka
    logging.debug("Connecting to Kafka; %s", args.kafka_configs)
    kafka_consumer = Consumer(args.kafka_configs)
    kafka_consumer.subscribe([args.kafka_topic], on_assign=on_assign_cb)

    # if 'pretty-print' not set, write libpcap global header
    if args.pretty_print == 0:
        sys.stdout.write(global_header(args))
        sys.stdout.flush()

    try:
        pkts_in = 0
        while not finished.is_set() and (args.max_packets <= 0 or pkts_in < args.max_packets):

            # consume a message from kafka
            msg = kafka_consumer.poll(timeout=poll_timeout)
            if msg is None:
                # no message received
                continue;

            elif msg.error():

                if msg.error().code() == KafkaError._PARTITION_EOF:
                    if args.pretty_print > 0:
                        print "Reached end of topar: topic=%s, partition=%d, offset=%s" % (
                            msg.topic(), msg.partition(), msg.offset())
                else:
                    raise KafkaException(msg.error())

            else:
                pkts_in += 1
                logging.debug("Packet received: pkts_in=%d", pkts_in)

                if args.pretty_print == 0:

                    # write the packet header and packet
                    sys.stdout.write(packet_header(msg))
                    sys.stdout.write(msg.value())
                    sys.stdout.flush()

                elif pkts_in % args.pretty_print == 0:

                    # pretty print
                    print 'Packet[%s]: date=%s topic=%s partition=%s offset=%s len=%s' % (
                        pkts_in, to_date(unpack_ts(msg.key())), args.kafka_topic,
                        msg.partition(), msg.offset(), len(msg.value()))

    finally:
        sys.stdout.close()
        kafka_consumer.close()

Exemple #17

0

Afficher le fichier

Fichier : Streaming_AbstractGenerator.py Projet : shrinivaasanka/asfer-github-code

class StreamAbsGen(object):
	def __init__(self,data_storage,data_source):
		#For Apache Cassandra, HBase and Hive, code from HivePythonClient.py for HiveServer2,
		#HBasePythonClient.py and CassandraPythonClient.py has been #replicated in __iter__(). 

		#Possible storages:
		#self.data_storage="file"
		#self.data_storage="hive"
		#self.data_storage="hbase"
		#self.data_storage="cassandra"
		#self.data_storage="USBWWAN_stream"
		#self.data_storage="KingCobra"
		#self.data_storage="Spark_Parquet"
		#self.data_storage="AsFer_Encoded_Strings"
		self.data_storage=data_storage

		#Possible datasources:
		#self.data_source="RZF"
		#self.data_source="movielens"
		#self.data_source="USBWWAN"
		#self.data_source="file"
		#self.data_source="KingCobra"
		#self.data_source="Spark_Streaming"
		#self.data_source="NeuronRain"
		self.data_source=data_source

		if self.data_storage=="KingCobra":
			self.inputfile=open("/var/log/kingcobra/REQUEST_REPLY.queue")

		if self.data_storage=="AsFer_Encoded_Strings":
			self.inputfile=open("../cpp-src/asfer.enterprise.encstr")

		if self.data_storage=="file":
			self.inputfile=open(data_source,"r")

		if self.data_storage=="USBWWAN_stream":
			self.inputfile=open("../../usb-md-github-code/usb_wwan_modified/testlogs/kern.log.print_buffer_byte")

		if self.data_storage=="hbase":
			self.hbase_connection = happybase.Connection(host='localhost',port=9090,transport='buffered')
			self.hbase_table = self.hbase_connection.table('stream_data')
			print "StreamAbsGen:__init__():connected to HBase table"
	
		if self.data_storage=="hive":	
			#pyhs2 client - requires SASL
			self.hive_conn=pyhs2.connect(host='localhost',
       	        		    port=10000,
			            authMechanism="PLAIN",
       		                     user='******',
       		                     password='******',
       		                     database='default')
			self.hive_cur=self.hive_conn.cursor()
			#Show databases
			print self.hive_cur.getDatabases()

			#Execute query
			self.hive_cur.execute("CREATE TABLE stream_data (alphanum STRING)")
			self.hive_cur.execute("select * from stream_data")

			#Return column info from query
			print self.hive_cur.getSchema()
			print "StreamAbsGen:__init__():connected to Hive table"

		if self.data_storage=="cassandra":
			self.cl=Cluster()
			self.session = self.cl.connect('cassandrakeyspace')
			inputf=open('movielens_stream2.data')
			for line in inputf:
		       		linetoks=line.split(' ')
		       		query='INSERT INTO stream_data(row_id,alphanum) VALUES (\''+linetoks[0]+'\',\''+linetoks[1]+'\');'
		       		print query
		       		session.execute(query)
			self.query='SELECT * FROM stream_data'
			self.resultrows=self.session.execute(self.query)
			print "StreamAbsGen:__init__(): connected to Cassandra"

		if self.data_storage=="Kafka":
		        self.c = Consumer({'bootstrap.servers': '0', 'group.id': 'test-consumer-group', 'default.topic.config': {'auto.offset.reset': 'smallest'}})
		        self.c.subscribe(['neuronraindata'])
		if self.data_storage=="Socket_Streaming":
			self.streaming_host=self.data_source
			self.streaming_port=64001
		if self.data_storage=="OperatingSystem":
			self.streaming_host="localhost"
		if self.data_storage=="TextHistogramPartition":
			self.partition_stream=[]
			for ds in data_source:
				self.partition_stream.append(open(ds,"r"))
		if self.data_storage=="DictionaryHistogramPartition":
			self.partition_stream=open(data_source,"r")
			
				
	def __iter__(self):
		if self.data_storage=="Spark_Parquet":
			self.spark=SparkSession.builder.getOrCreate()
			spark_stream_parquet=self.spark.read.parquet("../java-src/bigdata_analytics/spark_streaming/word.parquet")
			#spark_stream_parquet_DS=spark_stream_parquet.rdd.map(lambda row: (row.word))
			spark_stream_parquet_DS=spark_stream_parquet.rdd.filter(lambda row: row.word not in [' ','or','and','who','he','she','whom','well','is','was','were','are','there','where','when','may', 'The', 'the', 'In','in','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',' ','.', '"', ',', '{', '}', '+', '-', '*', '/', '%', '&', '(', ')', '[', ']', '=', '@', '#', ':', '|', ';','\'s','1','2','3','4','5','6','7','8','9','0'])
			for r in spark_stream_parquet_DS.collect():
				print "StreamiAbsGen(Spark Parquet): iterator yielding %s" % r.word.encode("UTF-8")
				yield r.word.encode("UTF-8")
		if self.data_storage=="KingCobra":
			for i in self.inputfile:
				print "StreamAbsGen(file storage): iterator yielding %s" % i
				yield i
		if self.data_storage=="hbase":
			for key,value in self.hbase_table.scan():
				print "StreamAbsGen(HBase storage): iterator yielding %s" % i
   				yield value['cf:alphanum'] 
		if self.data_storage=="AsFer_Encoded_Strings":
			for i in self.inputfile:
				print "StreamAbsGen(file storage): iterator yielding %s" % i
				yield i
		if self.data_storage=="file":
			for i in self.inputfile:
				words=i.split()
				for word in words:
					print "StreamAbsGen(file storage): iterator yielding %s" % word.strip() 
					yield word.strip() 
		if self.data_storage=="hive":
		        #Fetch table results
		        for i in self.hive_cur.fetch():
				print "StreamAbsGen(Hive storage): iterator yielding %s" % i[0]
		                yield i[0]
		if self.data_storage=="cassandra":
			for row in self.resultrows:
			        #print row.row_id,' ',row.alphanum
				print "StreamAbsGen(Cassandra storage): iterator yielding %s" % row.alphanum 
				yield row.alphanum
		if self.data_storage=="USBWWAN_stream":
			for i in self.inputfile:
				#print "StreamAbsGen(USBWWAN byte stream data): iterator yielding %s" % i
				yield i
		if self.data_storage=="Kafka":
			        while True:
				    print "Polling Kafka topic to receive message ..."
			            msg = self.c.poll()
			            if not msg.error() and msg.value():
			                print('Received message: ' , msg.value().encode("utf-8"))
					yield msg
			            else:
			                print(msg.error())
			        self.c.close()
		if self.data_storage=="Socket_Streaming":
			s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
			s.connect((self.streaming_host,self.streaming_port))
			print "socket_streaming_client(): host = ",self.streaming_host,"; post=",self.streaming_port
			data=""
			while data != None:
				data=s.recv(100)
				yield data
		if self.data_storage=="OperatingSystem" and self.data_source=="SchedulerRunQueue":
			from DeepLearning_SchedulerAnalytics import sched_debug_runqueue
			while True:
				schedrunqueue=sched_debug_runqueue()
				#df=DataFrame(data=schedrunqueue)
				#yield df
				yield schedrunqueue
		if self.data_storage=="TextHistogramPartition":
			self.sc = SparkContext()
			for ps in self.partition_stream:
				partition_stream_DS=self.sc.parallelize(ps.readlines()).flatMap(lambda line: line.split(" ")).map(lambda word: (word,[1])).reduceByKey(lambda v1,v2: v1+v2).groupByKey().mapValues(list)
				partition=partition_stream_DS.collect()
				print "partition:",partition
				if partition[0] is not '':
					print "StreamAbsGen(Spark Parquet): iterator yielding labelled partition: %s" % partition
					yield partition	
		if self.data_storage=="DictionaryHistogramPartition":
			dict_stream=ast.literal_eval(self.partition_stream.read())
			for d in dict_stream:
				yield d

Exemple #18

0

Afficher le fichier

    kafka_settings = {
        'bootstrap.servers': kafka_server,
        'group.id': 'kafka_scanner_clients',
        'client.id': 'kafka_scanner_client_%s' % kafka_scanner_name,
        'enable.auto.commit': True,
        'session.timeout.ms': 6000,
        'security.protocol': 'SSL',
        'ssl.ca.location': '/Certs/client-ca.cer',
        'ssl.certificate.location': '/Certs/client.cer',
        'ssl.key.location': '/Certs/client.key',
        'ssl.key.password': kafka_ssl_password,
        'auto.offset.reset': 'smallest'
    }
    try:
        consumer = Consumer(kafka_settings)
        consumer.subscribe(['ENTITY_RISK_LEVEL'])
    except:
        print(
            Fore.RED +
            'There was an issue with your kafka consumer settings. Please ensure the SSL certs are in the correct directory and your settings are correct.'
        )
        continue
    print(Fore.GREEN + 'Your kafka settings were successful. Moving on.')
    break

# Write out configuration file
print()
print(Fore.CYAN + 'Writing Kafka Scanner settings configuration...' +
      Fore.RESET)
with open(parentdir + '\\settings.py', 'a+') as f:
    f.write('# KAFKA SETTINGS\n')

Exemple #19

0

Afficher le fichier

Fichier : event_subscriber.py Projet : PreethiVijai/Movie-review-web-app

client = MongoClient(
    "mongodb+srv://" + username + ":" + password +
    "@cluster0-kpzsd.gcp.mongodb.net/test?retryWrites=true&w=majority")
db = client["realTime"]
tweetCol = db["tweets_test"]

consumer = Consumer({
    'bootstrap.servers': 'kafka:9092',
    'group.id': 'mygroup',
    'default.topic.config': {
        'auto.offset.reset': 'latest'
    }
})

consumer.subscribe(['example_topic'])
print('subscribed')

while True:
    msg = consumer.poll(1)
    print("here")

    if msg is None:
        continue
    if msg.error():
        if msg.error().code() == KafkaError._PARTITION_EOF:
            continue
        else:
            print(msg.error())
            break

Exemple #20

0

Afficher le fichier

class KafkaConsumer:
    """Defines the base kafka consumer class"""

    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        self.broker_properties = {
            "bootstrap.servers": ",".join(["PLAINTEXT://localhost:9092"]),
            "group.id": f"{topic_name_pattern}",
            "default.topic.config": {"auto.offset.reset": "earliest"},
        }

        # TODO: Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties["schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)

        # TODO: Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        self.consumer.subscribe([self.topic_name_pattern], on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        # TODO: If the topic is configured to use `offset_earliest` set the partition offset to
        # the beginning or earliest
        for partition in partitions:
            if self.offset_earliest is True:
                logger.debug(
                    f"setting partitions to earliest for {self.topic_name_pattern}"
                )
                logger.debug(f"before: {partition}")
                partition.offset = confluent_kafka.OFFSET_BEGINNING
                logger.debug(f"after: {partition}")
        logger.info(f"partitions assigned for {self.topic_name_pattern}")
        
        # TODO: Assign the consumer the partitions
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        # TODO: Poll Kafka for messages. Make sure to handle any errors or exceptions.
        # Additionally, make sure you return 1 when a message is processed, and 0 when no message
        # is retrieved.
        logger.debug(f"consuming from topic pattern {self.topic_name_pattern}")
        try:
            message = self.consumer.poll(timeout=self.consume_timeout)
        except SerializerError as e:
            logger.error(
                f"failed to deserialize message {self.topic_name_pattern}: {e}"
            )
            return 0

        if message is None:
            logger.debug("no messages to be consumed")
            return 0
        elif message.error() is not None:
            logger.error(
                f"failed to consume message {self.topic_name_pattern}: {message.error()}"
            )
            return 0

        logger.debug(f"message received: ({message.key()}) {message.value()}")
        self.message_handler(message)
        return 1


    def close(self):
        """Cleans up any open kafka consumers"""
        # TODO: Cleanup the kafka consumer
        logger.debug("closing consumer...")
        self.consumer.close()

Exemple #21

0

Afficher le fichier

Fichier : KcConsumer.py Projet : osowski/refarch-kc

class KafkaConsumer:

    def __init__(self, kafka_env = 'LOCAL', kafka_brokers = "", kafka_user = "", kafka_password = "", topic_name = "",autocommit = True):
        self.kafka_env = kafka_env
        self.kafka_brokers = kafka_brokers
        self.kafka_user = kafka_user
        self.kafka_password = kafka_password
        self.topic_name = topic_name
        self.kafka_auto_commit = autocommit

    # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
    # Prepares de Consumer with specific options based on the case
    def prepareConsumer(self, groupID = "pythonconsumers"):
        options ={
                'bootstrap.servers':  self.kafka_brokers,
                'group.id': groupID,
                'auto.offset.reset': 'earliest',
                'enable.auto.commit': self.kafka_auto_commit,
        }
        if (self.kafka_env != 'LOCAL'):
            options['security.protocol'] = 'SASL_SSL'
            options['sasl.mechanisms'] = 'PLAIN'
            options['sasl.username'] = self.kafka_user
            options['sasl.password'] = self.kafka_password
        if (self.kafka_env == 'OCP'):
            options['sasl.mechanisms'] = 'SCRAM-SHA-512'
            options['ssl.ca.location'] = os.environ['PEM_CERT']

        # Printing out producer config for debugging purposes        
        print("[KafkaConsumer] - This is the configuration for the consumer:")
        print("[KafkaConsumer] - -------------------------------------------")
        print('[KafkaConsumer] - Bootstrap Server:  {}'.format(options['bootstrap.servers']))
        if (self.kafka_env != 'LOCAL'):
            # Obfuscate password
            if (len(self.kafka_password) > 3):
                obfuscated_password = self.kafka_password[0] + "*****" + self.kafka_password[len(self.kafka_password)-1]
            else:
                obfuscated_password = "******"
            print('[KafkaConsumer] - Security Protocol: {}'.format(options['security.protocol']))
            print('[KafkaConsumer] - SASL Mechanism:    {}'.format(options['sasl.mechanisms']))
            print('[KafkaConsumer] - SASL Username:     {}'.format(options['sasl.username']))
            print('[KafkaConsumer] - SASL Password:     {}'.format(obfuscated_password))
            if (self.kafka_env == 'OCP'): 
                print('[KafkaConsumer] - SSL CA Location:   {}'.format(options['ssl.ca.location']))
        print("[KafkaConsumer] - -------------------------------------------")

        # Create the consumer
        self.consumer = Consumer(options)
        self.consumer.subscribe([self.topic_name])
    
    # Prints out and returns the decoded events received by the consumer
    def traceResponse(self, msg):
        msgStr = msg.value().decode('utf-8')
        print('[KafkaConsumer] - Consumed message from topic {} partition: [{}] at offset {}:'.format(msg.topic(), msg.partition(), msg.offset()))
        print('[KafkaConsumer] - key: {}, value: {}'.format(str(msg.key()), msgStr))
        return msgStr

    # Polls for events until it finds an event where keyId=keyname
    def pollNextEvent(self, keyID, keyname):
        gotIt = False
        anEvent = {}
        while not gotIt:
            msg = self.consumer.poll(timeout=10.0)
            # Continue if we have not received a message yet
            if msg is None:
                continue
            if msg.error():
                print("[KafkaConsumer] - Consumer error: {}".format(msg.error()))
                # Stop reading if we find end of partition in the error message
                if ("PARTITION_EOF" in msg.error()):
                    gotIt= True
                continue
            msgStr = self.traceResponse(msg)
            # Create the json event based on message string formed by traceResponse
            anEvent = json.loads(msgStr)
            # If we've found our event based on keyname and keyID, stop reading messages
            if (anEvent["payload"][keyname] == keyID):
                gotIt = True
        return anEvent

    # Polls for events until it finds an event with same key
    def pollNextEventByKey(self, keyID):
        if (str(keyID) == ""):
            print("[KafkaConsumer] - Consumer error: Key is an empty string")
            return None
        gotIt = False
        anEvent = {}
        while not gotIt:
            msg = self.consumer.poll(timeout=10.0)
            # Continue if we have not received a message yet
            if msg is None:
                continue
            if msg.error():
                print("[KafkaConsumer] - Consumer error: {}".format(msg.error()))
                # Stop reading if we find end of partition in the error message
                if ("PARTITION_EOF" in msg.error()):
                    gotIt= True
                continue
            msgStr = self.traceResponse(msg)
            # Create the json event based on message string formed by traceResponse
            anEvent = json.loads(msgStr)
            # If we've found our event based on keyname and keyID, stop reading messages
            if (str(msg.key().decode('utf-8')) == keyID):
                gotIt = True
        return anEvent

    # Polls for events endlessly
    def pollEvents(self):
        gotIt = False
        while not gotIt:
            msg = self.consumer.poll(timeout=10.0)
            if msg is None:
                continue
            if msg.error():
                print("[ERROR] - [KafkaConsumer] - Consumer error: {}".format(msg.error()))
                if ("PARTITION_EOF" in msg.error()):
                    gotIt= True
                continue
            self.traceResponse(msg)
    
    def close(self):
        self.consumer.close()

Exemple #22

0

Afficher le fichier

class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #

        self.broker_properties = {
            'BROKER_URL': 'PLAINTEXT://localhost:9092',
            'SCHEMA_REGISTRY': 'http://localhost:8081',
        }

        schema_registry = CachedSchemaRegistryClient({
            'url':
            self.broker_properties['SCHEMA_REGISTRY'],
            'ssl.ca.location':
            None,
            'ssl.certificate.location':
            None,
            'ssl.key.location':
            None
        })

        # TODO: Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties[
                "schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(
                {
                    "bootstrap.servers": self.broker_properties['BROKER_URL'],
                    "group.id": "0"
                },
                schema_registry=schema_registry)
        else:
            self.consumer = Consumer({
                "bootstrap.servers":
                self.broker_properties['BROKER_URL'],
                "group.id":
                "0",
                "auto.offset.reset":
                "earliest"
            })

        #
        #
        # TODO: Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        #
        #
        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        # TODO: If the topic is configured to use `offset_earliest` set the partition offset to
        # the beginning or earliest
        if self.offset_earliest:
            for partition in partitions:

                partition.offset = OFFSET_BEGINNING

            # TODO: Assign the consumer the partitions
            #       See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=partition#confluent_kafka.Consumer.assign

            logger.info("partitions assigned for %s", self.topic_name_pattern)
            consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        #
        #
        # TODO: Poll Kafka for messages. Make sure to handle any errors or exceptions.
        # Additionally, make sure you return 1 when a message is processed, and 0 when no message
        # is retrieved.
        #
        #
        message = c.poll(self.consume_timeout)
        if message is None:
            print("no message received by consumer")
            return 0
        elif message.error() is not None:
            raise Exception(f"error from consumer {message.error()}")

        else:
            print(f"consumed message {message.key()}: {message.value()}")
            return 1

    def close(self):
        """Cleans up any open kafka consumers"""
        #
        #
        # TODO: Cleanup the kafka consumer
        #
        #
        self.consumer.unsubscribe()
        self.consumer.close()

Exemple #23

0

Afficher le fichier

# Scope = chat:write
token = os.environ["SLACK_BOT_TOKEN"]

sc = WebClient(token)

# Set 'auto.offset.reset': 'smallest' if you want to consume all messages
# from the beginning of the topic
settings = {
    "bootstrap.servers": "localhost:9092",
    "group.id": "kafka-notify",
    "default.topic.config": {"auto.offset.reset": "largest"},
}
c = Consumer(settings)

# Topic = "SLACK-KAFKA"
c.subscribe(["SLACK-KAFKA"])

# TODO: Make bolts with Apache Storm
try:
    while True:
        msg = c.poll(0.1)  # read data
        time.sleep(5)
        if msg is None:
            continue
        elif not msg.error():
            print("Received message: {0}".format(msg.value()))
            if msg.value() is None:
                continue

            try:
                app_msg = json.loads(msg.value().decode())

Exemple #24

0

Afficher le fichier

Fichier : app.py Projet : skipperkongen/kaf

class KafkaApp:

    def __init__(self, name, consumer_config, producer_config, consumer_batch_size=1, consumer_timeout=60):
        self.name = name
        self.consumer_config = consumer_config
        self.producer_config = producer_config
        self.processors = []
        self.subs = {}
        self.logger = logging.getLogger(f'KafkaApp[{name}]')
        self.consumer_batch_size = consumer_batch_size
        self.consumer_timeout = consumer_timeout
        self.on_processed_callbacks = []
        self.consumer = Consumer(consumer_config)
        self.producer = Producer(producer_config)
        signal.signal(signal.SIGINT, self.exit_gracefully)
        signal.signal(signal.SIGTERM, self.exit_gracefully)
        self.running = False

    def exit_gracefully(self, signum, frame):
        self.producer.flush()
        self.running = False
        self.logger.info('Exiting gracefully')
        sys.exit()

    @retry(wait_fixed=5000, retry_on_exception=retry_if_buffer_error_or_retriable)
    def _initialise_clients(self):
        """
        Try to initialise until successful
        """
        self.logger.info('Trying to initialise clients...')
        self.consumer = Consumer(self.consumer_config)
        self.producer = Producer(self.producer_config)
        topics = list(self.subs.keys())
        self.logger.debug(f'Subscribing to topics: {topics}')
        self.consumer.subscribe(topics)
        self.logger.info('Clients initialised')

    @retry(wait_fixed=5000, retry_on_exception=retry_if_buffer_error_or_retriable)
    def _consume_messages(self):
        """
        Try to consume until successful (unless error is fatal)
        """
        return self.consumer.consume(
            num_messages=self.consumer_batch_size,
            timeout=self.consumer_timeout
        )

    @retry(wait_fixed=5000, retry_on_exception=retry_if_buffer_error_or_retriable)
    def _produce_message(self, key, value, publish_to):
        """
        Try to produce until successful (unless error is fatal)
        """
        self.producer.produce(
            key=key,
            value=value,
            topic=publish_to
        )
        self.producer.poll(0)

    @retry(wait_fixed=5000, retry_on_exception=retry_if_buffer_error_or_retriable)
    def _commit_message(self, msg):
        """
        Try to commit until successful (unless error is fatal)
        """
        self.consumer.commit(msg)

    def run(self):
        """
        Main loop of kaf. Should never exit.

        Pseudo-code:

            inputs = consume()
            for input in inputs:
                outputs = process(input)
                for output in outputs:
                    produce(output)
                commit(input)

        """
        self.logger.debug('Run loop started')
        self._initialise_clients()

        # Loop forever
        self.running = True
        while self.running:
            iter_t0 = time.perf_counter()
            self.logger.debug('Iteration started')

            # Try to consume messages until successful
            self.logger.info(
                f"Consuming messages. Topics: {self.consumer.list_topics()},  Partitions: {self.consumer.assignment()}")
            msgs = self._consume_messages()
            if len(msgs) == 0:
                self.logger.info(
                    f'No messages consumed for {self.consumer_timeout} seconds')
            else:
                self.logger.info(f'Consumed {len(msgs)} message(s)')
            for i, msg in enumerate(msgs):
                # Case 1a: msg has retriable error => don't commit
                # Case 1b: msg has fatal error => commit
                # Case 2: msg was processed successfully => commit
                # Case 3: msg processing failed => don't commit

                # Completely process each message before continuing to next
                try:
                    i += 1
                    t0 = time.perf_counter()
                    self.logger.info(f'Input message[{i}] processing started')

                    error = msg.error()
                    if error is not None:
                        # Case 1a / 1b
                        if error.code() == KafkaError._PARTITION_EOF:
                            self.logger.info(
                                f' {msg.topic()}[{msg.partition()}] reached end \
                                    of offset {msg.offset()}'
                            )
                        else:
                            self.logger.error(error)
                    else:
                        # Call user functions
                        process_output = self._process_message(msg)
                        # Materialise output, so that user functions are forced to complete
                        process_output = list(process_output)
                        # Publish results
                        for j, (value, key, publish_to) in enumerate(process_output):
                            j += 1
                            self._produce_message(
                                key=key,
                                value=value,
                                publish_to=publish_to
                            )
                            self.logger.info(
                                f'Output message[{j}] produced to topic "{publish_to}" on broker(s) {self.producer_config["bootstrap.servers"]}')
                        # We don't care if callback raises an Exception
                        t1 = time.perf_counter()
                        for callback in self.on_processed_callbacks:
                            try:
                                callback(msg, t1 - t0)
                            except Exception as e_inner:
                                self.logger.exception(e)
                except Exception as e:
                    self.logger.error(f'An error occured in run loop: {e}')
                    self.logger.exception(e)
                finally:
                    try:
                        self._commit_message(msg)
                        self.logger.info(f'Input message[{i}] committed')
                    except Exception as e:
                        self.logger.error(f'Input message[{i}] not committed')
                        self.logger.exception(e)

            iter_t1 = time.perf_counter()
            self.logger.debug(
                f'Iteration completed in {iter_t1 - iter_t0} seconds')

    def _process_message(self, msg):
        """
        Process a single message by calling all subscribed user functions
        """
        input_bytes = msg.value()
        topic = msg.topic()

        subs = self._get_subs(topic)
        self.logger.debug(
            f'Found {len(subs)} function(s) subscribed to topic "{topic}"')
        for func, publish_to, accepts, returns in subs:
            try:
                input_obj = self._parse(input_bytes, accepts)
                outputs = func(input_obj)
                self.logger.info(
                    f'User function "{func.__name__}" completed successfully')
                for output_obj, key in outputs:
                    if publish_to is None:
                        continue
                    key = self._keyify(key)
                    output_bytes = self._serialize(output_obj, returns)
                    yield output_bytes, key, publish_to
            except Exception as e:
                self.logger.error(
                    f'User function "{func.__name__}" raised an exception: {e}')
                self.logger.exception(e)

    def _parse(self, input_bytes, accepts):
        if accepts == 'bytes':
            return input_bytes
        elif accepts == 'json':
            return json.loads(input_bytes)
        else:
            raise TypeError(
                f'Unsupported value for accepts parameter: {accepts}')

    def _keyify(self, key):
        if key is None:
            return key
        else:
            return bytes(key)

    def _serialize(self, output_obj, returns):
        """
        Serialize an output from a user function, i.e. turn it into bytes.
        """
        if returns == 'bytes':
            # Assert that already serialized
            if type(output_obj) != bytes:
                raise TypeError(
                    f'User function should return bytes, but returned {type(output_obj)}')
            return output_obj
        elif returns == 'json':
            try:
                return json.dumps(output_obj).encode('utf-8')
            except:
                raise TypeError(
                    f'User function returned value that can not be serialized to JSON: {output_obj}')
        else:
            raise TypeError(
                f'User function returned unsupported type: {type(output_obj)}')

    def _get_subs(self, topic):
        """
        Returns a list of user functions subscriptions on a a topic.
        """
        return self.subs.get(topic) or []

    def process(self, topic, publish_to=None, accepts='bytes', returns='bytes'):
        """
        Decorator for user functions that processes a single event. The value
        of the event is passed to the user function.
        - The accepts parameter can be set to 'bytes' or 'json'
        - The returns parameter can be set to 'bytes' or 'json'
        The user function should return results as `yield value, key`, where
        the type of value depends on the returns parameter (either raw bytes or something that can
        be passed to json.dumps). The key should be either None or bytes.
        """
        assert(accepts in ['bytes', 'json'])
        assert(returns in ['bytes', 'json'])

        def process_decorator(func):
            sub = (func, publish_to, accepts, returns)
            self.subs.setdefault(topic, []).append(sub)
            return func
        return process_decorator

    def on_processed(self, func):
        """
        Decorator for user callbacks
        """
        self.on_processed_callbacks.append(func)
        return func

Exemple #25

0

Afficher le fichier

Fichier : tweetConsumer.py Projet : rgasper/kafka_tweets

    logger.setLevel(logging.DEBUG)
    handler = logging.StreamHandler()
    handler.setFormatter(
        logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s'))
    logger.addHandler(handler)

    # Create Consumer instance
    # Hint: try debug='fetch' to generate some log messages
    c = Consumer(conf, logger=logger)

    def print_assignment(consumer, partitions):
        print('Assignment:', partitions)

    log.info("subscribing to the topic : " + str(topics))
    # Subscribe to topics
    c.subscribe(topics, on_assign=print_assignment)

    log.info("Reading msg from  the topic : " + str(topics))
    # Read messages from Kafka, print to stdout
    try:
        while True:
            msg = c.poll(timeout=1.0)
            if msg is None:
                continue
            if msg.error():
                raise KafkaException(msg.error())
            else:
                # Proper message
                sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' %
                                 (msg.topic(), msg.partition(), msg.offset(),
                                  str(msg.key())))

Exemple #26

0

Afficher le fichier

Fichier : consumer.py Projet : peter-de-boer/optimizing-public-transportation

class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        #
        #
        # DONE: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "bootstrap.servers": "PLAINTEXT://localhost:9092",
            "group.id": "opt-group"
            #
            # DONE
            #
        }

        # DONE: Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties[
                "schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)

        #
        #
        # DONE: Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        #
        #
        self.consumer.subscribe([topic_name_pattern], on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        # DONE: If the topic is configured to use `offset_earliest` set the partition offset to
        # the beginning or earliest
        logger.info("on_assign...")
        for partition in partitions:
            try:
                if self.offset_earliest:
                    partition.offset = confluent_kafka.OFFSET_BEGINNING
            except:
                logger.info("something wrong with OFFSET_BEGINNING...")
            #
            #
            # DONE
            #
            #

        logger.info("partitions assigned for %s", self.topic_name_pattern)
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        #
        #
        # DONE: Poll Kafka for messages. Make sure to handle any errors or exceptions.
        # Additionally, make sure you return 1 when a message is processed, and 0 when no message
        # is retrieved.
        #
        #
        logger.info("consume message...")
        message = self.consumer.poll(1.0)
        if message is None:
            logger.info("no message")
            return 0
        elif message.error() is not None:
            logger.info(f"error from consumer {message.error()}")
            return 0
        elif message.value() is None:
            logger.info("empty message")
            return 0
        else:
            logger.info(f"consumed message {message.key()}: {message.value()}")
            self.message_handler(message)
            return 1

    def close(self):
        """Cleans up any open kafka consumers"""
        #
        #
        # DONE: Cleanup the kafka consumer
        #
        #
        self.consumer.close()

Exemple #27

0

Afficher le fichier

class KafkaConsumer:
    """Defines the base kafka consumer class"""

    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        #
        #
        # Done: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            'BROKER_URL': 'localhost:9092',
            'SCHEMA_REGISTRY_URL': 'localhost:8081',
            'REST_PROXY': 'localhost:8082'     
        }

        # Done: Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties["schema.registry.url"] = "http://localhost:8081"
            schema_registry = CachedSchemaRegistryClient(
                self.broker_properties["schema.registry.url"])
            self.consumer = AvroConsumer(
                {"bootstrap.servers": self.broker_properties.get("BROKER_URL"),
                 "group.id":f"{self.topic_name_pattern}"},
                schema_registry = schema_registry)
        else:
            self.consumer = Consumer(
                {"bootstrap.servers": self.broker_properties.get("BROKER_URL"),
                 "group.id": "0"})
           

        #
        #
        # Done: Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        #
        #
        self.consumer.subscribe([f"^{self.topic_name_pattern}"], on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        # Done: If the topic is configured to use `offset_earliest` set the partition offset to
        # the beginning or earliest
        logger.info("on_assign is incomplete - skipping")
        for partition in partitions:
            partition.offset = OFFSET_BEGINNING

        logger.info("partitions assigned for %s", self.topic_name_pattern)
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        #
        #
        # Done: Poll Kafka for messages. Make sure to handle any errors or exceptions.
        # Additionally, make sure you return 1 when a message is processed, and 0 when no message
        # is retrieved.
        #
        #
        message = self.consumer.poll(1.0)
        ret_code = 0
        if message is None:
            logger.debug("no message received by consumer")
            ret_code = 0 
        elif message.error() is not None:
            logger.debug(f"error from consumer {message.error()}")
            ret_code = 0
        else:
            logger.info(f"consumed meaage, {message.topic()}")
            #logger.info(f"consumed message, {message.key()}: {message.value()}")
            ret_code = 1
            self.message_handler(message)
        #await asyncio.sleep(self.sleep_secs)
        #logger.info("_consume is incomplete - skipping")
        return ret_code


    def close(self):
        """Cleans up any open kafka consumers"""
        #
        #
        # Done: Cleanup the kafka consumer
        #
        #
        self.consumer.close()

Exemple #28

0

Afficher le fichier

Fichier : read_runs.py Projet : ScreamingUdder/isis_nexus_streamer_for_mantid

Print published run information from Kafka stream
"""


def parseMessage(buf):
    buf = bytearray(buf)
    runInfo = ISISStream.RunInfo.RunInfo.GetRootAsRunInfo(buf, 0)
    start_time = datetime.datetime.fromtimestamp(runInfo.StartTime()).strftime('%Y-%m-%d %H:%M:%S')
    string_to_print = "Run number: " + str(runInfo.RunNumber()) + \
        ", Start time: " + start_time + \
        ", Instrument name: " + runInfo.InstName() + \
        ", Stream offset: " + str(runInfo.StreamOffset())
    print string_to_print


if __name__ == "__main__":
    c = Consumer({'bootstrap.servers': 'sakura', 'group.id': 'python-read-run-info',
                  'default.topic.config': {'auto.offset.reset': 'smallest'}, 'enable.auto.commit': False})
    c.subscribe(['test_run_topic'])
    running = True
    while running:
        msg = c.poll(1000)
        if not msg.error():
            parseMessage(msg.value())
        elif msg.error().code() != KafkaError._PARTITION_EOF:
            print(msg.error())
            running = False
        else:
            running = False
    c.close()

Exemple #29

0

Afficher le fichier

Fichier : old_consumer.py Projet : revpoint/jangl-utils

class KafkaConsumerWorker(BaseWorker):
    topic_name = None
    consumer_name = None
    consumer_settings = {}
    commit_on_complete = True
    async_commit = True
    poll_timeout = 0.01
    sleep_time = 0.05
    timestamp_fields = ['timestamp']
    decimal_fields = []
    boolean_fields = []

    def setup(self):
        self.consumer = Consumer(**self.get_consumer_settings())
        self.serializer = self.get_message_serializer()
        self.set_topic()

    def teardown(self):
        self.consumer.close()

    def get_topic_name(self):
        if self.topic_name is None:
            raise NotImplementedError
        return self.topic_name

    def get_consumer_name(self):
        if self.consumer_name is None:
            raise NotImplementedError
        return self.consumer_name

    def get_broker_url(self):
        broker_url = settings.BROKER_URL
        if broker_url is None:
            raise NotImplementedError
        return broker_url

    def get_zookeeper_url(self):
        zookeeper_url = settings.ZOOKEEPER_URL
        if zookeeper_url is None:
            raise NotImplementedError
        return zookeeper_url

    def get_consumer_settings(self):
        broker_url = self.get_broker_url()
        logger.debug('connecting to kafka: ' + broker_url)

        consumer_name = self.get_consumer_name()
        logger.debug('using group id: ' + consumer_name)

        initial_settings = {
            'api.version.request': True,
            'broker.version.fallback': '0.9.0',
            'client.id': 'JanglConsumer',
            'bootstrap.servers': broker_url,
            'group.id': consumer_name,
            'default.topic.config': {'auto.offset.reset': 'earliest'},
            'enable.auto.commit': False,
            'on_commit': self.on_commit,
            'session.timeout.ms': 10000,
            'heartbeat.interval.ms': 1000,
        }
        return generate_client_settings(initial_settings, self.consumer_settings)

    def get_message_serializer(self):
        schema_registry_url = self.get_schema_registry_url()
        logger.debug('loading schema registry: ' + schema_registry_url)
        schema_client = CachedSchemaRegistryClient(url=schema_registry_url)
        return MessageSerializer(schema_client)

    def get_schema_registry_url(self):
        schema_microservice = settings.SCHEMA_MICROSERVICE
        if schema_microservice:
            schema_registry_url = get_service_url(schema_microservice)
        else:
            schema_registry_url = settings.SCHEMA_REGISTRY_URL
        if schema_registry_url is None:
            raise NotImplementedError
        return schema_registry_url

    def set_topic(self):
        topic_name = self.get_topic_name()
        logger.debug('set kafka topic: ' + topic_name)
        self.consumer.subscribe([topic_name], on_assign=self.on_assign, on_revoke=self.on_revoke)

    def on_assign(self, consumer, partitions):
        logger.debug('partitions assigned: {}'.format(partitions))
        consumer.assign(partitions)

    def on_revoke(self, consumer, partitions):
        logger.debug('partitions revoked: {}'.format(partitions))
        try:
            consumer.commit(async=False)
        except KafkaException:
            pass
        consumer.unassign()

    def on_commit(self, err, partitions):
        if err is None:
            logger.debug('commit done: {}'.format(partitions))
        else:
            logger.error('commit error: {} - {}'.format(err, partitions))

    def handle(self):
        message = self.consumer.poll(timeout=self.poll_timeout)

        if message is not None:
            if message.error():
                if message.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    logger.info('%% %s [%d] reached end at offset %d\n' %
                                (message.topic(), message.partition(), message.offset()))
                elif message.error():
                    raise KafkaException(message.error())
            else:
                message = DecodedMessage(self.serializer, message)
                message = self.parse_message(message)

                self.consume_message(message)

                if self.commit_on_complete:
                    self.commit()
            self.done()
        else:
            self.wait()

    def parse_message(self, message):
        for field in self.timestamp_fields:
            if field in message:
                try:
                    message[field] = datetime.fromtimestamp(message[field], utc)
                except ValueError:
                    try:
                        message[field] = datetime.fromtimestamp(message[field]/1000, utc)
                    except TypeError:
                        pass
                except TypeError:
                    pass
        for field in self.decimal_fields:
            if field in message:
                try:
                    message[field] = decimal.Decimal(message[field])
                except (TypeError, decimal.InvalidOperation):
                    pass
        for field in self.boolean_fields:
            if field in message:
                try:
                    message[field] = bool(message[field])
                except TypeError:
                    pass
        return message

    def commit(self):
        if not self.consumer_settings.get('enable.auto.commit'):
            self.consumer.commit(async=self.async_commit)

    def consume_message(self, message):
        pass

Exemple #30

0

Afficher le fichier

Fichier : getMain.py Projet : frankogit/KafkaElkS3_Medicalpipe

#######################################################################
# KAFKA CONSUMER MICROSERVICE
# FRANKO ORTIZ
# KAFKA CONFLUENT PYTHON CLIENT

#######################################################################
# FAKER + RANDOM FOR CREATE SYNTHETIC FROM SOURCE KAFKA TOPIC
fake = Faker()

c = Consumer({
    'bootstrap.servers': 'broker:9092',
    'group.id': 'consumer9',
    'auto.offset.reset': 'earliest'
})

c.subscribe(['sciencesourcetopic'])

es = Elasticsearch(
    ['es01:9200', 'es02:9200', 'es03:9200'],
    sniff_on_start=True,  # sniff before doing anything    
    sniff_on_connection_fail=
    True,  # refresh nodes after a node fails to respond    
    sniffer_timeout=60)  # and also every 60 seconds

#######################################################################
# USER DEFINE FUNCTIONS


def random_nat_prov_id(rng):
    prov_id = ''
    for _ in range(rng):

Exemple #31

0

Afficher le fichier

from confluent_kafka import Consumer, KafkaError


c = Consumer({
    'bootstrap.servers': 'ec2-52-200-128-8.compute-1.amazonaws.com:9093',
    'group.id': '10',
    'default.topic.config': {
        'auto.offset.reset': 'smallest'
    }
})

c.subscribe(['test_kafka'])

while True:
    msg = c.poll(1.0)
    msgValue = msg.value()
    if msgValue.decode('utf-8') != 'Broker: No more messages':
        f = open("testcase.txt", "r")
        fline = f.readline()
        for x in fline:
            if msgValue == 'Broker: No more messages':
                print('No messages at this point')
                break
            print('Received: ' + msgValue + "; Expected: " + x)
            msg = c.poll(1.0)
            msgValue = msg.value().decode('utf-8')

        # print('Received message: {}'.format(msg.value().decode('utf-8')))
        continue
    else:
        print('No messages at this point')

Exemple #32

0

Afficher le fichier

class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest
        self.BROKER_URL = 'PLAINTEXT://localhost:9092'
        self.SCHEMA_REGISTRY_URL = 'http://localhost:8081'

        self.broker_properties = {
            "bootstrap.servers": self.BROKER_URL,
            "group.id": "0"
        }

        if is_avro is True:
            schema_registry = CachedSchemaRegistryClient(
                {"url": self.SCHEMA_REGISTRY_URL})
            self.consumer = AvroConsumer(self.broker_properties,
                                         schema_registry=schema_registry)
        else:
            self.consumer = Consumer(self.broker_properties)
            pass

        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        for partition in partitions:
            partition.offset = OFFSET_BEGINNING
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        while True:
            message = self.consumer.poll(timeout=1.0)
            if message is None:
                return 0
            elif message.error():
                logger.error(message.error())
            else:
                if self.topic_name_pattern == 'com.opt.weather':
                    logger.info(message.value())
                return 1

    def __del__(self):
        pass

    def close(self):
        self.consumer.close()
        self.__del__()

Exemple #33

0

Afficher le fichier

Fichier : inventory_events_consumer.py Projet : kgaikwad/ros-backend

class InventoryEventsConsumer:
    """Inventory events consumer."""
    def __init__(self):
        """Create a Inventory Events Consumer."""
        self.consumer = Consumer({
            'bootstrap.servers': INSIGHTS_KAFKA_ADDRESS,
            'group.id': GROUP_ID,
            'enable.auto.commit': False
        })

        # Subscribe to topic
        self.consumer.subscribe([INVENTORY_EVENTS_TOPIC])
        self.event_type_map = {
            'delete': self.host_delete_event,
            'created': self.host_create_update_events,
            'updated': self.host_create_update_events
        }
        self.prefix = 'PROCESSING INVENTORY EVENTS'

    def __iter__(self):
        return self

    def __next__(self):
        msg = self.consumer.poll()
        if msg is None:
            raise StopIteration
        return msg

    def run(self):
        """Initialize Consumer."""
        for msg in iter(self):
            if msg.error():
                print(msg.error())
                raise KafkaException(msg.error())
            try:
                msg = json.loads(msg.value().decode("utf-8"))
                event_type = msg['type']
                if event_type in self.event_type_map.keys():
                    handler = self.event_type_map[event_type]
                    handler(msg)
                else:
                    LOG.info('Event Handling is not found for event %s - %s',
                             event_type, self.prefix)
            except json.decoder.JSONDecodeError:
                LOG.error('Unable to decode kafka message: %s - %s',
                          msg.value(), self.prefix)
            except Exception as err:
                LOG.error(
                    'An error occurred during message processing: %s in the system %s created from account: %s - %s',
                    repr(err),
                    msg['host']['id'],
                    msg['host']['account'],
                    self.prefix,
                )
            finally:
                self.consumer.commit()
        LOG.warning("Stopping inventory consumer")
        self.consumer.close()

    def host_delete_event(self, msg):
        """Process delete message."""
        self.prefix = "PROCESSING DELETE EVENT"
        host_id = msg['id']
        insights_id = msg['insights_id']
        with app.app_context():
            LOG.info(
                'Deleting performance profile records with insights_id %s - %s',
                insights_id, self.prefix)
            rows_deleted = db.session.query(
                System.id).filter(System.inventory_id == host_id).delete()
            if rows_deleted > 0:
                LOG.info('Deleted host from inventory with id: %s - %s',
                         host_id, self.prefix)
            db.session.commit()

    def host_create_update_events(self, msg):
        """ Process created/updated message ( create system record, store new report )"""
        self.prefix = "PROCESSING Create/Update EVENT"
        if 'is_ros' in msg['platform_metadata']:
            self.process_system_details(msg)

    def process_system_details(self, msg):
        """ Store new system information (stale, stale_warning timestamp) and return internal DB id"""
        host = msg['host']
        performance_record = get_performance_profile(
            msg['platform_metadata']['url'])
        if performance_record:
            performance_utilization = self._calculate_performance_utilization(
                performance_record, host)
            with app.app_context():
                account = get_or_create(db.session,
                                        RhAccount,
                                        'account',
                                        account=host['account'])

                system = get_or_create(
                    db.session,
                    System,
                    'inventory_id',
                    account_id=account.id,
                    inventory_id=host['id'],
                    display_name=host['display_name'],
                    fqdn=host['fqdn'],
                    cloud_provider=host['system_profile']['cloud_provider'],
                    instance_type=performance_record.get('instance_type'),
                    stale_timestamp=host['stale_timestamp'])

                get_or_create(db.session,
                              PerformanceProfile, ['system_id', 'report_date'],
                              system_id=system.id,
                              performance_record=performance_record,
                              performance_utilization=performance_utilization,
                              report_date=datetime.datetime.utcnow().date())

                # Commit changes
                db.session.commit()
                LOG.info(
                    "Refreshed system %s (%s) belonging to account: %s (%s) via report-processor",
                    system.inventory_id, system.id, account.account,
                    account.id)

    def _calculate_performance_utilization(self, performance_record, host):
        MAX_IOPS_CAPACITY = 16000
        memory_utilized = (float(performance_record['mem.util.used']) /
                           float(performance_record['mem.physmem'])) * 100
        cpu_utilized = self._calculate_cpu_score(performance_record)
        cloud_provider = host['system_profile']['cloud_provider']
        if cloud_provider == 'aws':
            MAX_IOPS_CAPACITY = 16000
        if cloud_provider == 'azure':
            MAX_IOPS_CAPACITY = 20000
        io_utilized = (float(performance_record['disk.all.total']) /
                       float(MAX_IOPS_CAPACITY)) * 100
        performance_utilization = {
            'memory': int(memory_utilized),
            'cpu': int(cpu_utilized),
            'io': int(io_utilized)
        }
        return performance_utilization

    def _calculate_cpu_score(self, performance_record):
        idle_cpu_percent = (
            (float(performance_record['kernel.all.cpu.idle']) * 100) /
            int(performance_record['total_cpus']))
        cpu_utilized_percent = 100 - idle_cpu_percent
        return cpu_utilized_percent

Exemple #34

0

Afficher le fichier

Fichier : consumer.py Projet : saad-hussain/OptimizingPublicTransportation

class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "group.id": f"{topic_name_pattern}",
            "default.topic.config": {
                "auto.offset.reset": "earliest"
            },
            'bootstrap.servers': BOOTSTRAP_SERVERS,
        }

        # TODO: Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties["schema.registry.url"] = SCHEMA_REGISTRY
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)
        #
        #
        # TODO: Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        #
        #

        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        # TODO: If the topic is configured to use `offset_earliest` set the partition offset to
        # the beginning or earliest
        for partition in partitions:
            if self.offset_earliest:
                partition.offset = OFFSET_BEGINNING

        logger.info("partitions assigned for %s", self.topic_name_pattern)
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        #
        #
        # TODO: Poll Kafka for messages. Make sure to handle any errors or exceptions.
        # Additionally, make sure you return 1 when a message is processed, and 0 when no message
        # is retrieved.
        #
        #
        try:
            message = self.consumer.poll(timeout=self.consume_timeout)
            if message is None:
                return 0
            elif message.error() is not None:
                return 0
            self.message_handler(message)
            return 1
        except SerializerError as e:
            logger.error("Message deserialization failed for %s: %s" %
                         (message, e))
            return 0

    def close(self):
        """Cleans up any open kafka consumers"""
        #
        #
        # TODO: Cleanup the kafka consumer
        #
        #
        self.consumer.close()

Exemple #35

0

Afficher le fichier

Fichier : vehicleLocationUpdateKafkaConsumer.py Projet : sowmiyalakshmij/vehicles


settings = {
    'bootstrap.servers': 'kafka:29092',
    'group.id': 'mygroup',
    'client.id': 'client-1',
    'enable.auto.commit': True,
    'session.timeout.ms': 6000,
    'default.topic.config': {
        'auto.offset.reset': 'smallest'
    }
}

c = Consumer(settings)

c.subscribe([vehicleConstants.KAFKA_TOPIC_VEHICLE_LOCATION_UPDATE])

try:
    while True:
        msg = c.poll(0.1)
        if msg is None:
            continue
        elif not msg.error():
            print('Received message: {0}'.format(msg.value()))
        elif msg.error().code() == KafkaError._PARTITION_EOF:
            print('End of partition reached {0}/{1}'.format(
                msg.topic(), msg.partition()))
        else:
            print('Error occured: {0}'.format(msg.error().str()))
        print(msg)
        persistKafkaMsgInDB(msg)

Exemple #36

0

Afficher le fichier

Fichier : btcusd_consumer_confl.py Projet : otarabanovskyi/sandbox_repo

    lv_bootstrap_servers = 'procamp-cluster-m.us-east1-b.c.bigdata-procamp-1add8fad.internal'
else:
    logger.info('******* Set local kafka broker')
    lv_bootstrap_servers = 'localhost:9092'

settings = {
    'bootstrap.servers': lv_bootstrap_servers,
    'group.id': 'group-1',
    'client.id': 'client-1',
    'enable.auto.commit': False,
    'session.timeout.ms': 6000,
    # 'default.topic.config': {'auto.offset.reset': 'smallest'}
    'default.topic.config': {'auto.offset.reset': 'latest'}
}
c = Consumer(settings)
c.subscribe(['gcp.orders.fct.btcusd.0'])

full_df = pd.DataFrame({'data.id': pd.Series([], dtype='int'),
                        'data.id_str': pd.Series([], dtype='str'),
                        'data.order_type': pd.Series([], dtype='int'),
                        'data.datetime': pd.Series([], dtype='str'),
                        'data.microtimestamp': pd.Series([], dtype='str'),
                        'data.amount': pd.Series([], dtype='float'),
                        'data.amount_str': pd.Series([], dtype='str'),
                        'data.price': pd.Series([], dtype='float'),
                        'data.price_str': pd.Series([], dtype='str'),
                        'channel': pd.Series([], dtype='str'),
                        'event': pd.Series([], dtype='str')})

try:
    cnt = 0

Exemple #37

0

Afficher le fichier

class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        self.broker_properties = {
            "group.id": "consumer_group",
            "bootstrap.servers": "PLAINTEXT://localhost:9092",
            "auto.offset.reset": "earliest"
        }

        if is_avro is True:
            self.broker_properties[
                "schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)
            pass

        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        logger.info("on_assign is incomplete - skipping")
        for partition in partitions:
            if self.offset_earliest is True:
                partition.offset = confluent_kafka.OFFSET_BEGINNING

        logger.info("partitions assigned for %s", self.topic_name_pattern)
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        while True:
            message = self.consumer.poll(1.0)
            if message is 0:
                print(f"error from consumer {message.error()}")
            elif message.error() is not None:
                print(f"error from consumer: {message.error()}")
            else:
                print(
                    f"consumed message with {message.key()} and value {message.value()}"
                )
        logger.info("consumer completed")
        return 0

    def close(self):
        """Cleans up any open kafka consumers"""
        logger.debug("closing consumer")
        self.consumer.close()

Exemple #38

0

Afficher le fichier

Fichier : python_kafka_notify.py Projet : omkreddy/examples

if token is None:
	print('\n\n*******\nYou need to set your Slack API token in the SLACK_API_TOKEN environment variable\n\nExiting.\n\n*******\n')
	sys.exit(1)

sc = SlackClient(token)

# Set 'auto.offset.reset': 'smallest' if you want to consume all messages
# from the beginning of the topic
settings = {
    'bootstrap.servers': 'localhost:9092',
    'group.id': 'python_kafka_notify.py',
    'default.topic.config': {'auto.offset.reset': 'largest'}
}
c = Consumer(settings)

c.subscribe(['UNHAPPY_PLATINUM_CUSTOMERS'])

try:
    while True:
        msg = c.poll(0.1)
        time.sleep(5)
        if msg is None:
            continue
        elif not msg.error():
            print('Received message: {0}'.format(msg.value()))
            if msg.value() is None:
                continue
            try:
                app_msg = json.loads(msg.value().decode())
            except:
                app_msg = json.loads(msg.value())

Exemple #39

0

Afficher le fichier

Fichier : handlers.py Projet : wikimedia/eventlogging

def kafka_confluent_reader(
    path,
    topics=None,
    topic=None,  # deprecated
    identity=None,
    raw=False,
    poll_timeout=1.0,
    **kwargs
):
    """
    Reads events from Kafka.

    Kafka URIs look like:
    kafka:///b1:9092,b2:9092?topics=topic1,topic2&identity=consumer_group&
    &auto.commit.interval.ms=1000...

    This uses the Consumer from the librdkafka backed confluent-kafka
    python library.  You may pass any configs that the librdkafka Consumer
    take as keyword arguments via URI query params.

    auto.commit.interval.ms is by default 5 seconds.

    If enable.auto.commit is True (the default), then messages will be marked
    as done based on the auto.commit.interval.ms time period.
    This has the downside of committing message offsets before
    work might be actually complete.  E.g. if inserting into MySQL, and
    the process dies somewhere along the way, it is possible
    that message offsets will be committed to Kafka for messages
    that have not been inserted into MySQL.  Future work
    will have to fix this problem somehow.  Perhaps a callback?

    The 'topic' parameter is provided for backwards compatibility.
    It will be used if topics is not given.

    Arguments:
        *path (str): Comma separated list of broker hostname:ports.

        *topics (list): List of topics to subscribe to.

        *topic (str): Deprecated topic to subscribe to.  Use topics instead.
            Ignored if topics is provided.

        *identity (str): Used as the Kafka consumer group.id, and the prefix
            of the Kafka client.id.  If not given, a new unique identity will
            be created.

        *raw (bool): If True, the generator returned will yield a stream of
            strings, else a stream of Events.  Default: False.

        *poll_timeout (float) Timeout in seconds to use for call to
            consumer.poll().  poll will only block for this long
            if there are no messages.  Default: 1.0.
    """
    if not topics and not topic:
        raise ValueError(
            'Cannot consume from Kafka without providing topics.'
        )

    from confluent_kafka import Consumer, KafkaError
    import signal

    # Use topics as an array if given, else just use topic
    topics = topics.split(',') if topics else [topic]

    # Get kafka client_id and group_id based on identity.
    (client_id, group_id) = kafka_ids(identity)

    # Remove anything that we know is not going to be a valid
    # Kafka Consumer parameter from kwargs and then set some required
    # configs.
    eventlogging_keys = ('port', 'hostname', 'uri')
    kafka_args = {k: kwargs[k] for k in kwargs if k not in eventlogging_keys}
    kafka_args['bootstrap.servers'] = path.strip('/')
    kafka_args['group.id'] = group_id
    kafka_args['client.id'] = client_id

    kafka_consumer = Consumer(**kafka_args)

    logging.info(
        'Consuming topics %s from Kafka in group %s as %s',
        topics, group_id, client_id
    )

    # Callback for logging during consumer rebalances
    def log_assign(consumer, partitions):
        logging.info('Partition assignment change for %s. Now consuming '
                     'from %s partitions: %s',
                     client_id, len(partitions), partitions)

    # Subscribe to list of topics.
    kafka_consumer.subscribe(topics, on_assign=log_assign)

    # Define a generator to read from the Consumer instance.
    def consume(consumer, timeout=1.0):
        # Make sure we close the consumer on SIGTERM.
        # SIGINT should be caught by the finally in consume().
        def shutdown_handler(_signo, _stack_frame):
            logging.info('Caught SIGTERM, closing KafkaConsumer %s '
                         'to commit outstanding offsets.', client_id)
            consumer.close()
            sys.exit(0)
        signal.signal(signal.SIGTERM, shutdown_handler)

        # Wrap the poll loop in a try/finally.
        try:
            while True:
                # Poll for messages
                message = consumer.poll(timeout=timeout)

                # If no message was found in timeout, poll again.
                if not message:
                    continue

                # Else if we encountered a KafkaError, log and continue.
                elif message.error():
                    # _PARTITION_EOF is pretty normal, just log at debug
                    if message.error().code() == KafkaError._PARTITION_EOF:
                        logging.debug(
                            'KafkaConsumer %s consuming %s [%d] '
                            'reached end at offset %d\n' % (
                                client_id,
                                message.topic(),
                                message.partition(),
                                message.offset()
                            )
                        )
                    # Else this is a real KafkaError, log at error.
                    else:
                        logging.error(message.error())

                # Else we got a proper message, yield it.
                else:
                    yield message.value()
        except BaseException as e:
            error_message = 'Exception while KafkaConsumer %s consuming' % (
                client_id
            )
            # Add more info if message is defined.
            if message:
                error_message += ' from %s [%s] at offset %s' % (
                    message.topic(), message.partition(), message.offset(),
                )
            logging.error(error_message)
            if (type(e) != KeyboardInterrupt):
                raise(e)
        finally:
            logging.info('Finally closing KafkaConsumer %s '
                         'to commit outstanding offsets.', client_id)
            consumer.close()

    # Return a stream of message values.
    return stream(consume(kafka_consumer, poll_timeout), raw)

Exemple #40

0

Afficher le fichier

Fichier : consumer.py Projet : cgvarela/examples-2

    # Create Consumer instance
    # 'auto.offset.reset=earliest' to start reading from the beginning of the
    #   topic if no committed offsets exist
    c = Consumer({
        'bootstrap.servers': conf['bootstrap.servers'],
        'sasl.mechanisms': 'PLAIN',
        'security.protocol': 'SASL_SSL',
        'sasl.username': conf['sasl.username'],
        'sasl.password': conf['sasl.password'],
        'group.id': 'python_example_group_1',
        'auto.offset.reset': 'earliest'
    })

    # Subscribe to topic
    c.subscribe([topic])

    # Process messages
    total_count = 0
    try:
        while True:
            print("Waiting for message or event/error in poll()")
            msg = c.poll(1.0)
            if msg is None:
                # No message available within timeout.
                # Initial message consumption may take up to
                # `session.timeout.ms` for the consumer group to
                # rebalance and start consuming
                continue
            elif not msg.error():
                # Check for Kafka message

Exemple #41

0

Afficher le fichier

Fichier : confluent_cloud.py Projet : confluentinc/confluent-kafka-python

# after each produce() call to trigger delivery report callbacks.
p.flush(10)

c = Consumer({
    'bootstrap.servers': '<ccloud bootstrap servers>',
    'broker.version.fallback': '0.10.0.0',
    'api.version.fallback.ms': 0,
    'sasl.mechanisms': 'PLAIN',
    'security.protocol': 'SASL_SSL',
    'sasl.username': '******',
    'sasl.password': '******',
    'group.id': str(uuid.uuid1()),  # this will create a new consumer group on each invocation.
    'auto.offset.reset': 'earliest'
})

c.subscribe(['python-test-topic'])

try:
    while True:
        msg = c.poll(0.1)  # Wait for message or event/error
        if msg is None:
            # No message available within timeout.
            # Initial message consumption may take up to `session.timeout.ms` for
            #   the group to rebalance and start consuming.
            continue
        if msg.error():
            # Errors are typically temporary, print error and continue.
            print("Consumer error: {}".format(msg.error()))
            continue

        print('consumed: {}'.format(msg.value()))

Exemple #42

0

Afficher le fichier

Fichier : confluent_kafka_reader.py Projet : aglagla/kafka-influxdb

class ConfluentKafkaReader(object):
    def __init__(self, host, port, group, topic, buffer_size, reconnect_wait_time=2):
        """
        Initialize Kafka reader
        """
	logging.info("Initializing Confluent Kafka Consumer")
        self.host = host
        self.port = str(port)
        self.group = group
        self.topic = [topic]
	self.buffer_size = buffer_size
        self.reconnect_wait_time = reconnect_wait_time
        self.reconnect_retries = 0
        self.max_reconnect_retries = 10 # TODO: implement config parameter
	self.buffer = []

        # Initialized on read
        self.consumer = None

    def on_assign(self, consumer, partitions):
#	for p in partitions:
#            p.offset=-2
#        consumer.assign(partitions)
        logging.debug('on_assignment callback...')
        logging.info('Assignment:', partitions)

    def _connect(self):
        connection = {'bootstrap.servers': self.host+":"+self.port, 'group.id': self.group, 'session.timeout.ms': 6000,
			'default.topic.config': {'auto.offset.reset': 'largest'}}
        logging.info("Connecting to Kafka at %s...", connection)
        self.consumer = Consumer(**connection)
        self.consumer.subscribe(self.topic, on_assign=self.on_assign)

    def read(self):
        """
        Read from Kafka. Reconnect on error.
        """
	try:
           self._connect()
	   msgcn = 0
	   while True:
	      msg = self.consumer.poll(timeout=1.0)
	      if msg is None:
 		continue
	      if msg.error():
                # Error or event
		if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                   logging.debug('Catching KafkaError._PARTITION_EOF')
		   logging.error('%s [%d] reached end at offset %d\n', msg.topic(), msg.partition(), msg.offset())
                   logging.error('%s [%d] at offset %d with key %s:\n',
                                 msg.topic(), msg.partition(), msg.offset(),
                                 str(msg.key()))
                   break
		elif msg.error():
                    # Error
                    # TODO : extend exception handling scope as we will end here
                    # for a lot of reasons !
                   logging.debug('Catching other errors...')
		   logging.error("Kafka error: %s.", msg.error())
		   logging.error("Trying to reconnect to %s:%s", self.host, self.port)
                   self.reconnect_retries += 1
		   time.sleep(self.reconnect_wait_time)
                   if self.reconnect_retries >= self.max_reconnect_retries:
                       logging.error("Max reconnection attempt limit reached (%d). Aborting",
                                     self.max_reconnect_retries)
                       break
                   else:
                       self.consumer.close()
                       self._connect()
		       pass
                    #raise KafkaException(msg.error())
	      else:
            # Proper message
		logging.error('%s [%d] at offset %d with key %s:\n', msg.topic(), msg.partition(), msg.offset(), str(msg.key()))
		(self.buffer).append(msg.value().rstrip('\n')) # otherwise the
                #writter will add extra \n 
                msgcn += 1
                #self.consumer.commit(async=False)
		if msgcn >= self.buffer_size: 
                    logging.debug("Read buffer [%d] reached.",self.buffer_size)
                    break
	except KeyboardInterrupt:
          logging.info('Aborted by user\n')
    # Close down consumer to commit final offsets.
	self.consumer.close()
	return(self.buffer)

Exemple #43

0

Afficher le fichier

    # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
    props = {
        'bootstrap.servers': KAFKA_BROKER_URL,  # Kafka集群在那裡? (置換成要連接的Kafka集群)
        'group.id': STUDENT_ID,  # ConsumerGroup的名稱 (置換成你/妳的學員ID)
        'auto.offset.reset': 'earliest',  # Offset從最前面開始
        'session.timeout.ms': 6000,
        'error_cb': error_cb  # 設定接收error訊息的callback函數
    }

    # 步驟2. 產生一個Kafka的Consumer的實例
    consumer = Consumer(props)
    # 步驟3. 指定想要訂閱訊息的topic名稱
    topicName = 'ak02.hw.translog'
    # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic
    consumer.subscribe(
        [topicName],
        on_assign=seek_to_begin)  # ** Tips: 讓這支程式每次重起時都把offset移到最前面
    # 步驟5. 持續的拉取Kafka有進來的訊息

    # 產生一個Map: key是part_no, value是qty balance <---- 存放 "題目#16" 庫存異動數的容器
    parts_transQtyBalance = {}

    # 初始庫存值 - 透過第15題的結果來把每一個part_no的值取負並放進parts_transQtyBalance中來做為初始值
    # parts_transQtyBalance['part_01'] = 0
    # parts_transQtyBalance['part_02'] = 0
    # parts_transQtyBalance['part_03'] = 0
    # parts_transQtyBalance['part_04'] = 0
    # parts_transQtyBalance['part_05'] = 0
    # parts_transQtyBalance['part_06'] = 0
    # parts_transQtyBalance['part_07'] = 0
    # parts_transQtyBalance['part_08'] = 0

Exemple #44

0

Afficher le fichier

Fichier : app.py Projet : spassos/kafka

    logger = logging.getLogger('consumer')
    logger.setLevel(logging.DEBUG)
    handler = logging.StreamHandler()
    handler.setFormatter(
        logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s'))
    logger.addHandler(handler)

    # Create Consumer instance
    # Hint: try debug='fetch' to generate some log messages
    c = Consumer(conf, logger=logger)

    def print_assignment(consumer, partitions):
        print('Assignment:', partitions)

    # Subscribe to topics
    c.subscribe(['topico-demo-python'], on_assign=print_assignment)

    # Read messages from Kafka, print to stdout
    try:
        while True:
            msg = c.poll(timeout=1.0)
            if msg is None:
                continue
            if msg.error():
                raise KafkaException(msg.error())
            else:
                # Proper message
                sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' %
                                 (msg.topic(), msg.partition(), msg.offset(),
                                  str(msg.key())))
                print(msg.value())

Exemple #45

0

Afficher le fichier

Fichier : KafkaConnector.py Projet : malberich/pgds-etl-filters

class KafkaConnector(object):
    """Simple wrapper class to configure a simple kafka consumer
    and producer pair, so that they can be used to perform simple
    filter() and map() operations over the received tweets"""

    def __init__(
        self,
        group_id=None,
        consumer_topic='consumer_limbo',
        producer_topic='consumer_limbo',
        logging_topic='minteressa_stats',
        bootstrap_servers='kafka:9092'
    ):

        self.group_id = group_id
        self.bootstrap_servers = bootstrap_servers
        self.consumer_topic = consumer_topic
        self.producer_topic = producer_topic
        self.logging_topic = logging_topic

        self.consumer = None
        self.producer = None

    def listen(self):
        while True:
            msg = self.consumer.poll()
            if msg is None:
                continue
            if msg.error():
                # Error or event
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write(
                        '%% %s [%d] reached end at offset %d\n' % (
                            msg.topic(),
                            msg.partition(),
                            msg.offset()
                        )
                    )
                elif msg.error():
                    # Error
                    raise KafkaException(msg.error())
            else:
                # Proper message
                sys.stdout.write(
                    '%s [partition-%d] at offset %d with key %s:\n' %
                    (
                        msg.topic(),
                        msg.partition(),
                        msg.offset(),
                        str(msg.key())
                    )
                )
                yield msg

    def connect(self):
        self.consumer = Consumer({
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': self.group_id,
            'default.topic.config': {
                'auto.offset.reset': 'smallest'
            }
        })
        print("subscribing to %s" % self.consumer_topic)
        self.consumer.subscribe([
            self.consumer_topic
        ])
        print("Subscribed to topic %s " % self.consumer_topic)

        self.producer = Producer({
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': self.group_id
        })

    def send(self, message, producer_topic=None):
        producer_topic = producer_topic \
            if producer_topic is not None \
            else self.producer_topic

        self.producer.produce(
            producer_topic,
            message
        )
        # self.producer.flush()


    def log(self, message, logging_topic=None):
        logging_topic = logging_topic \
            if logging_topic is not None \
            else self.logging_topic

        self.producer.produce(logging_topic, message)
        self.producer.flush()

    def close(self):
        self.consumer.close()
        self.producer.close()

Exemple #46

0

Afficher le fichier

def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb(err, partitions):
        pass

    kc = Consumer({
        'group.id': 'test',
        'socket.timeout.ms': '100',
        'session.timeout.ms': 1000,  # Avoid close() blocking too long
        'on_commit': dummy_commit_cb
    })

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke(consumer, partitions):
        pass

    kc.subscribe(["test"],
                 on_assign=dummy_assign_revoke,
                 on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    if msg is not None:
        assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1)

    msglist = kc.consume(num_messages=10, timeout=0.001)
    assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist)

    with pytest.raises(ValueError) as ex:
        kc.consume(-100)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    with pytest.raises(ValueError) as ex:
        kc.consume(1000001)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    partitions = list(
        map(lambda part: TopicPartition("test", part), range(0, 100, 3)))
    kc.assign(partitions)

    # Verify assignment
    assignment = kc.assignment()
    assert partitions == assignment

    # Get cached watermarks, should all be invalid.
    lo, hi = kc.get_watermark_offsets(partitions[0], cached=True)
    assert lo == -1001 and hi == -1001
    assert lo == OFFSET_INVALID and hi == OFFSET_INVALID

    # Query broker for watermarks, should raise an exception.
    try:
        lo, hi = kc.get_watermark_offsets(partitions[0],
                                          timeout=0.5,
                                          cached=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\
            str(e.args([0]))

    kc.unassign()

    kc.commit(async=True)

    try:
        kc.commit(async=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT,
                                    KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions
                if p.offset == OFFSET_INVALID]) == len(partitions)

    try:
        kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT

    kc.close()

Exemple #47

0

Afficher le fichier

Fichier : consumer.py Projet : omkreddy/examples

    # Create Consumer instance
    # 'auto.offset.reset=earliest' to start reading from the beginning of the
    #   topic if no committed offsets exist
    c = Consumer({
        'bootstrap.servers': conf['bootstrap.servers'],
        'sasl.mechanisms': 'PLAIN',
        'security.protocol': 'SASL_SSL',
        'sasl.username': conf['sasl.username'],
        'sasl.password': conf['sasl.password'],
        'group.id': 'python_example_group_1',
        'auto.offset.reset': 'earliest'
    })

    # Subscribe to topic
    c.subscribe([topic])

    # Process messages
    total_count = 0
    try:
        while True:
            print("Waiting for message or event/error in poll()")
            msg = c.poll(1.0)
            if msg is None:
                # No message available within timeout.
                # Initial message consumption may take up to
                # `session.timeout.ms` for the consumer group to
                # rebalance and start consuming
                continue
            elif not msg.error():
                # Check for Kafka message

Exemple #48

0

Afficher le fichier

class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        self.broker_properties = {
            "bootstrap.servers": BROKER_URL,
            "group.id": GROUP_ID
        }

        # TODO - NEEDED?
        # if offset_earliest:
        # self.broker_properties['auto.offset.reset'] = 'earliest'

        if is_avro is True:
            self.broker_properties["schema.registry.url"] = SCHEMA_REGISTRY_URL
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)

        self.consumer.subscribe([topic_name_pattern], on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        if self.offset_earliest:
            for partition in partitions:
                partition.offset = confluent_kafka.OFFSET_BEGINNING

        logger.info("partitions assigned for %s", self.topic_name_pattern)
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""

        try:
            message = self.consumer.poll(self.consume_timeout)

            if message is None or message.error():
                return 0
            else:
                self.message_handler(message)
                return 1
        except Exception as e:
            logger.error(f"Failed to consume message: {e}")

    def close(self):
        """Cleans up any open kafka consumers"""
        self.consumer.close()

Exemple #49

0

Afficher le fichier

Fichier : BasicConsumer2.py Projet : hwhsu/Kafka-ds01

    props = {
        'bootstrap.servers': 'localhost:9092',  # Kafka集群在那裡? (置換成要連接的Kafka集群)
        'group.id': 'tdea',                     # ConsumerGroup的名稱 (置換成你/妳的學員ID)
        'auto.offset.reset': 'earliest',        # Offset從最前面開始
        'session.timeout.ms': 6000,
        'error_cb': error_cb                    # 設定接收error訊息的callback函數
    }

    # 步驟2. 產生一個Kafka的Consumer的實例
    consumer = Consumer(props)

    # 步驟3. 指定想要訂閱訊息的topic名稱
    topicName = "test2";

    # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic
    consumer.subscribe([topicName])

    # 步驟5. 持續的拉取Kafka有進來的訊息
    try:
        while True:
            # 請求Kafka把新的訊息吐出來
            record = consumer.poll(timeout=1.0) # 逐筆的取回訊息

            # 檢查是否有錯誤
            if record is None:
                continue
            if record.error():
                # Error or event
                if record.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write('%% %s [%d] reached end at offset %d\n' %

Exemple #50

0

Afficher le fichier

class KafkaConsumer:
    def __init__(self,
                 servers,
                 topic,
                 reset_offset=False,
                 reset_type='start',
                 consumer_group=None):
        self.bootstrap = servers

        self.consumer = None
        self.topic = topic
        self.reset_offset = reset_offset
        self.reset_type = reset_type
        self.consumer_group = consumer_group.lower().encode('utf-8')

    def activate(self):
        while True:
            try:
                conf = {
                    'bootstrap.servers': self.bootstrap,
                    'group.id': self.consumer_group,
                    'enable.auto.commit': False,
                    'auto.offset.reset': 'earliest'
                }
                self.consumer = Consumer(**conf)

                if self.reset_offset is True:
                    if self.reset_type == 'start':
                        tp = TopicPartition(self.topic, 0, OFFSET_BEGINNING)
                    else:
                        tp = TopicPartition(self.topic, 0, OFFSET_END)
                    self.consumer.assign([tp])
                else:
                    self.consumer.subscribe([self.topic])
                break

            except:
                time.sleep(30)
                logger.warning(
                    'Kafka Consumer {} Reconnected. Exception {}'.format(
                        self.topic, sys.exc_info()))

    def consume(self):
        try:
            raw = self.consumer.consume(num_messages=1, timeout=0.3)
            return raw[0] if raw else None

        except:
            self.activate()
            raw = self.consumer.consume(num_messages=1, timeout=0.3)
            return raw[0] if raw else None

    def commit_offsets(self):
        try:
            self.consumer.commit()

        except:
            self.activate()

    def stop(self):
        self.consumer.close()

Exemple #51

0

Afficher le fichier

Fichier : consumer.py Projet : suankan/streaming-os-metrics

    'bootstrap.servers': args.broker,
    'group.id': "foo",
    'auto.offset.reset': 'earliest',
    'security.protocol': 'SSL',
    'ssl.ca.location': args.cacert,
    'ssl.certificate.location': args.cert,
    'ssl.key.location': args.certkey
}

logging.basicConfig(stream=sys.stdout, filemode='w', format='%(asctime)s - %(message)s', level=logging.INFO)

logging.info(f"Starting Kafka Consumer injesting from broker {conf['bootstrap.servers']} every {args.interval} seconds")
consumer = Consumer(conf)

logging.info(f'Subscribing to kafka topic {args.topic}')
consumer.subscribe([args.topic])

try:
    logging.info("Open DB connection")
    conn = psycopg2.connect(args.dsn)
    cur = conn.cursor()

    logging.info("Create table for metrics if it doesn't exist")
    cur.execute("CREATE TABLE IF NOT EXISTS metrics (key UUID PRIMARY KEY, value JSONB)")

    while True:
        logging.info(f"Setting polling interval {args.interval} seconds to read from Kafka topic.")
        msg = consumer.poll(int(args.interval))
        if msg is None:
            logging.info(f"Have not received any message within {args.interval}. Retrying.")
            continue

Exemple #52

0

Afficher le fichier

    # Create Consumer instance
    # 'auto.offset.reset=earliest' to start reading from the beginning of the
    #   topic if no committed offsets exist
    consumer = Consumer({
        'bootstrap.servers': conf['bootstrap.servers'],
        'sasl.mechanisms': conf['sasl.mechanisms'],
        'security.protocol': conf['security.protocol'],
        'sasl.username': conf['sasl.username'],
        'sasl.password': conf['sasl.password'],
        'group.id': 'python_example_group_1',
        'auto.offset.reset': 'earliest',
    })

    # Subscribe to topic
    consumer.subscribe([topic])

    working_data = {}

    # Process messages
    # total_count = 0
    #f = open('consumer_output.json', 'w')
    breadcrumb_csv = open(BC_file, 'w')
    trip_csv = open(TP_file, 'w')
    failed_csv = open('failed_data.csv', 'w')
    breadcrumb_headers = [
            'tstamp',
            'latitude',
            'longitude',
            'direction',
            'speed',

Exemple #53

0

Afficher le fichier

Fichier : kafka_consumer1.py Projet : sidharth01g/LearnKafka

from confluent_kafka import Consumer, KafkaError


settings = {
    'bootstrap.servers': 'localhost:9092',
    'group.id': 'consumer_group',
    'client.id': 'client_1',
    'enable.auto.commit': True,
    'session.timeout.ms': 6000,
    'default.topic.config': {'auto.offset.reset': 'smallest'}
}

c = Consumer(settings)


c.subscribe(topics=['test_topic'])


try:
    while True:
        message = c.poll()
        if message is None:
            continue

        elif not message.error():
            print('Message received: {}'.format(message.value()))
        elif message.error().code() == KafkaError._PARTITION_EOF:
            print('End of partition reached {}/{}'.format(message.topic(), message.partition()))
        else:
            print('Error occurred: {}'.format(message.error().str()))
except KeyboardInterrupt:

Exemple #54

0

Afficher le fichier

Fichier : main.py Projet : eymentopcuoglu/hashtag-tracker

from builtins import print

from confluent_kafka import Consumer
import schedule
import xlsxwriter

FILE_GENERATION_INTERVAL = 60
messages = {}

print('Connecting...')
consumer = Consumer({
    'bootstrap.servers': 'kafka-broker:9092',
    'group.id': 'top-words',
    'auto.offset.reset': 'earliest'
})
consumer.subscribe(['tweets-wordcount'])
print('Successfully subscribed to the topic!')


def write_to_excel():
    print('Writing to excel file')
    list_of_messages = []
    for key, value in messages.items():
        if key == '':
            continue
        list_of_messages.append({'text': key, 'count': value})
    list_of_messages.sort(reverse=True, key=lambda e: e.get('count'))

    workbook = xlsxwriter.Workbook('./output/word-count.xlsx')
    worksheet = workbook.add_worksheet()

Exemple #55

0

Afficher le fichier

Fichier : consumer.py Projet : confluentinc/confluent-kafka-python

    # Create logger for consumer (logs will be emitted when poll() is called)
    logger = logging.getLogger('consumer')
    logger.setLevel(logging.DEBUG)
    handler = logging.StreamHandler()
    handler.setFormatter(logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s'))
    logger.addHandler(handler)

    # Create Consumer instance
    # Hint: try debug='fetch' to generate some log messages
    c = Consumer(conf, logger=logger)

    def print_assignment(consumer, partitions):
        print('Assignment:', partitions)

    # Subscribe to topics
    c.subscribe(topics, on_assign=print_assignment)

    # Read messages from Kafka, print to stdout
    try:
        while True:
            msg = c.poll(timeout=1.0)
            if msg is None:
                continue
            if msg.error():
                raise KafkaException(msg.error())
            else:
                # Proper message
                sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' %
                                 (msg.topic(), msg.partition(), msg.offset(),
                                  str(msg.key())))
                print(msg.value())

Exemple #56

0

Afficher le fichier

def collect_image(topic: str, kafka_session: Consumer):
    """Collect an image from the respective image topic
    
    Arguments:
        broker {str} -- Kafka client
        topic {str} -- topic (ex. images)
    """
    def print_assignment(consumer, partitions):
        print('Assignment:', partitions)

    kafka_session.subscribe(topic, on_assign=print_assignment)

    while True:
        msg = kafka_session.poll(timeout=1.0)
        if msg is None:
            continue
            logs.info("No messages available within topic : %s", topic)
        if msg.error():
            if msg.error().code() == KafkaError._PARTITION_EOF:
                logs.info('%% %s [%d] reached end of offset %d' %
                          (msg.topic(), msg.partition(), msg.offset()))
            else:
                logs.debug("Kafka Exception : %s", msg.error())
                raise KafkaException(msg.error())
        else:
            # Well formed messaged
            logs.info(
                '%% %s [%d] at offset %d with key %s: ' %
                (msg.topic(), msg.partition(), msg.offset(), str(msg.key())))

            # image transform
            image_array, orig_image_array = image_transform(msg)

            prediction, class_weights, final_conv_layer = do_inference(
                ts_server="172.23.0.9", ts_port=8500, model_input=image_array)

            # create CAM
            get_output = K.function([tf.convert_to_tensor(image_array)], [
                tf.convert_to_tensor(final_conv_layer),
                tf.convert_to_tensor(prediction)
            ])
            [conv_outputs, predictions] = get_output([image_array[0]])
            conv_outputs = conv_outputs[0, :, :, :]

            # TODO: Receiving variable results across CAMs generated by this
            # method. Needs further investigation and comparison to original
            # CAM paper found here : http://cnnlocalization.csail.mit.edu/
            cam = np.zeros(dtype=np.float32, shape=(conv_outputs.shape[:2]))
            for i, w in enumerate(class_weights[0]):
                cam += w * conv_outputs[:, :, i]
            cam = cam - np.min(cam)
            cam /= np.max(cam)
            #h,w = orig_image_array.shape[:2]
            cam = cv2.resize(cam, orig_image_array.shape[:2])

            # TODO : Investigate why the cv2.resize() function transposes
            # the height and width of the orig_image_array
            #cam = cv2.resize(cam, (orig_image_array.shape[:2][1], orig_image_array.shape[:2][0]), interpolation=cv2.INTER_CUBIC)
            cam = np.uint8(255 * cam)
            heatmap = cv2.applyColorMap(cam, cv2.COLORMAP_JET)
            #heatmap[np.where(cam < 0.2)] = 0
            img = heatmap * 0.3 + orig_image_array

            logs.info("Class Activation Map (CAM) Created!")

            # This is complete hackery and will need to be replaced
            # I don't know why a numpy array (see `img` array above)
            # would be 25MB when all constituent arrays are ~ 7MB total.
            # Let alone when saving an image to disk the image is only 1MB total.
            cv2.imwrite("inflight_img.png", img)

            new_img = Image.open("inflight_img.png", mode='r')
            img_bytes = io.BytesIO()
            new_img.save(img_bytes, format='PNG')
            img_bytes = img_bytes.getvalue()
            message = marshall_message(img_bytes, prediction.tolist())
            os.remove("inflight_img.png")

            p = kafka_producer()
            p.poll(0)
            p.produce(results_kafka_topic,
                      value=message,
                      callback=kafka_delivery_report)
            p.flush()

Exemple #57

0

Afficher le fichier

Fichier : consumer.py Projet : Kayle009/sentry

def run_commit_log_consumer(bootstrap_servers, consumer_group, commit_log_topic,
                            partition_state_manager, synchronize_commit_group, start_event, stop_request_event):
    start_event.set()

    logging.debug('Starting commit log consumer...')

    positions = {}

    # NOTE: The commit log consumer group should not be persisted into the
    # ``__consumer_offsets`` topic since no offsets are committed by this
    # consumer. The group membership metadata messages will be published
    # initially but as long as this group remains a single consumer it will
    # be deleted after the consumer is closed.
    # It is very important to note that the ``group.id`` **MUST** be unique to
    # this consumer process!!! This ensures that it is able to consume from all
    # partitions of the commit log topic and get a comprehensive view of the
    # state of the consumer groups it is tracking.
    consumer = Consumer({
        'bootstrap.servers': bootstrap_servers,
        'group.id': consumer_group,
        'enable.auto.commit': 'false',
        'enable.auto.offset.store': 'true',
        'enable.partition.eof': 'false',
        'default.topic.config': {
            'auto.offset.reset': 'error',
        },
    })

    def rewind_partitions_on_assignment(consumer, assignment):
        # The commit log consumer must start consuming from the beginning of
        # the commit log topic to ensure that it has a comprehensive view of
        # all active partitions.
        consumer.assign([
            TopicPartition(
                i.topic,
                i.partition,
                positions.get((i.topic, i.partition), OFFSET_BEGINNING),
            ) for i in assignment
        ])

    consumer.subscribe(
        [commit_log_topic],
        on_assign=rewind_partitions_on_assignment,
    )

    while not stop_request_event.is_set():
        message = consumer.poll(1)
        if message is None:
            continue

        error = message.error()
        if error is not None:
            raise Exception(error)

        positions[(message.topic(), message.partition())] = message.offset() + 1

        group, topic, partition, offset = get_commit_data(message)
        if group != synchronize_commit_group:
            logger.debug('Received consumer offsets update from %r, ignoring...', group)
            continue

        if offset in LOGICAL_OFFSETS:
            logger.debug(
                'Skipping invalid logical offset (%r) from %s/%s...',
                offset,
                topic,
                partition)
            continue
        elif offset < 0:
            logger.warning(
                'Received unexpected negative offset (%r) from %s/%s!',
                offset,
                topic,
                partition)

        partition_state_manager.set_remote_offset(topic, partition, offset)

Exemple #58

0

Afficher le fichier

Fichier : consumer.py Projet : Kayle009/sentry

class SynchronizedConsumer(object):
    """
    This class implements the framework for a consumer that is intended to only
    consume messages that have already been consumed and committed by members
    of another consumer group.

    This works similarly to the Kafka built-in ``__consumer_offsets`` topic.
    The consumer group that is being "followed" (the one that must make
    progress for our consumer here to make progress, identified by the
    ``synchronize_commit_group`` constructor parameter/instance attribute) must
    report its offsets to a topic (identified by the ``commit_log_topic``
    constructor parameter/instance attribute). This consumer subscribes to both
    commit log topic, as well as the topic(s) that we are actually interested
    in consuming messages from. The messages received from the commit log topic
    control whether or not consumption from partitions belonging to the main
    topic is paused, resumed, or allowed to continue in its current state
    without changes.

    The furthest point in any partition that this consumer should ever consume
    to is the maximum offset that has been recorded to the commit log topic for
    that partition. If the offsets recorded to that topic move
    non-monotonically (due to an intentional offset rollback, for instance)
    this consumer *may* consume up to the highest watermark point. (The
    implementation here tries to pause consuming from the partition as soon as
    possible, but this makes no explicit guarantees about that behavior.)
    """
    initial_offset_reset_strategies = {
        'earliest': get_earliest_offset,
        'latest': get_latest_offset,
    }

    def __init__(self, bootstrap_servers, consumer_group, commit_log_topic,
                 synchronize_commit_group, initial_offset_reset='latest', on_commit=None):
        self.bootstrap_servers = bootstrap_servers
        self.consumer_group = consumer_group
        self.commit_log_topic = commit_log_topic
        self.synchronize_commit_group = synchronize_commit_group
        self.initial_offset_reset = self.initial_offset_reset_strategies[initial_offset_reset]

        self.__partition_state_manager = SynchronizedPartitionStateManager(
            self.__on_partition_state_change)
        self.__commit_log_consumer, self.__commit_log_consumer_stop_request = self.__start_commit_log_consumer()

        self.__positions = {}

        def commit_callback(error, partitions):
            if on_commit is not None:
                return on_commit(error, partitions)

        consumer_configuration = {
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': self.consumer_group,
            'enable.auto.commit': 'false',
            'enable.auto.offset.store': 'true',
            'enable.partition.eof': 'false',
            'default.topic.config': {
                'auto.offset.reset': 'error',
            },
            'on_commit': commit_callback,
        }

        self.__consumer = Consumer(consumer_configuration)

    def __start_commit_log_consumer(self, timeout=None):
        """
        Starts running the commit log consumer.
        """
        stop_request_event = threading.Event()
        start_event = threading.Event()
        result = execute(
            functools.partial(
                run_commit_log_consumer,
                bootstrap_servers=self.bootstrap_servers,
                consumer_group='{}:sync:{}'.format(self.consumer_group, uuid.uuid1().hex),
                commit_log_topic=self.commit_log_topic,
                synchronize_commit_group=self.synchronize_commit_group,
                partition_state_manager=self.__partition_state_manager,
                start_event=start_event,
                stop_request_event=stop_request_event,
            ),
        )
        start_event.wait(timeout)
        return result, stop_request_event

    def __check_commit_log_consumer_running(self):
        if not self.__commit_log_consumer.running():
            try:
                result = self.__commit_log_consumer.result(timeout=0)  # noqa
            except TimeoutError:
                pass  # not helpful

            raise Exception('Commit log consumer unexpectedly exit!')

    def __on_partition_state_change(
            self, topic, partition, previous_state_and_offsets, current_state_and_offsets):
        """
        Callback that is invoked when a partition state changes.
        """
        logger.debug('State change for %r: %r to %r', (topic, partition),
                     previous_state_and_offsets, current_state_and_offsets)

        current_state, current_offsets = current_state_and_offsets
        if current_offsets.local is None:
            # It only makes sense to manipulate the consumer if we've got an
            # assignment. (This block should only be entered at startup if the
            # remote offsets are retrieved from the commit log before the local
            # consumer has received its assignment.)
            return

        # TODO: This will be called from the commit log consumer thread, so need
        # to verify that calling the ``consumer.{pause,resume}`` methods is
        # thread safe!
        if current_state in (SynchronizedPartitionState.UNKNOWN, SynchronizedPartitionState.SYNCHRONIZED,
                             SynchronizedPartitionState.REMOTE_BEHIND):
            self.__consumer.pause([TopicPartition(topic, partition, current_offsets.local)])
        elif current_state is SynchronizedPartitionState.LOCAL_BEHIND:
            self.__consumer.resume([TopicPartition(topic, partition, current_offsets.local)])
        else:
            raise NotImplementedError('Unexpected partition state: %s' % (current_state,))

    def subscribe(self, topics, on_assign=None, on_revoke=None):
        """
        Subscribe to a topic.
        """
        self.__check_commit_log_consumer_running()

        def assignment_callback(consumer, assignment):
            # Since ``auto.offset.reset`` is set to ``error`` to force human
            # interaction on an offset reset, we have to explicitly specify the
            # starting offset if no offset has been committed for this topic during
            # the ``__consumer_offsets`` topic retention period.
            assignment = {
                (i.topic, i.partition): self.__positions.get((i.topic, i.partition)) for i in assignment
            }

            for i in self.__consumer.committed([TopicPartition(topic, partition) for (
                    topic, partition), offset in assignment.items() if offset is None]):
                k = (i.topic, i.partition)
                if i.offset > -1:
                    assignment[k] = i.offset
                else:
                    assignment[k] = self.initial_offset_reset(consumer, i.topic, i.partition)

            self.__consumer.assign([TopicPartition(topic, partition, offset)
                                    for (topic, partition), offset in assignment.items()])

            for (topic, partition), offset in assignment.items():
                # Setting the local offsets will either cause the partition to be
                # paused (if the remote offset is unknown or the local offset is
                # not trailing the remote offset) or resumed.
                self.__partition_state_manager.set_local_offset(topic, partition, offset)
                self.__positions[(topic, partition)] = offset

            if on_assign is not None:
                on_assign(self, [TopicPartition(topic, partition)
                                 for topic, partition in assignment.keys()])

        def revocation_callback(consumer, assignment):
            for item in assignment:
                # TODO: This should probably also be removed from the state manager.
                self.__positions.pop((item.topic, item.partition))

            if on_revoke is not None:
                on_revoke(self, assignment)

        self.__consumer.subscribe(
            topics,
            on_assign=assignment_callback,
            on_revoke=revocation_callback)

    def poll(self, timeout):
        self.__check_commit_log_consumer_running()

        message = self.__consumer.poll(timeout)
        if message is None:
            return

        if message.error() is not None:
            return message

        self.__partition_state_manager.validate_local_message(
            message.topic(), message.partition(), message.offset())
        self.__partition_state_manager.set_local_offset(
            message.topic(), message.partition(), message.offset() + 1)
        self.__positions[(message.topic(), message.partition())] = message.offset() + 1

        return message

    def commit(self, *args, **kwargs):
        self.__check_commit_log_consumer_running()

        return self.__consumer.commit(*args, **kwargs)

    def close(self):
        self.__check_commit_log_consumer_running()

        self.__commit_log_consumer_stop_request.set()
        try:
            self.__consumer.close()
        finally:
            self.__commit_log_consumer.result()

Exemple #59

-1

Afficher le fichier

Fichier : Task.py Projet : newbelee/opsweb

def httpry_logs():
    consumer = Consumer({'bootstrap.servers': kafka_hosts, 'group.id': 'Httpry_logs_%s' %dt,'default.topic.config': {'auto.offset.reset': 'latest','auto.commit.enable':'true'}})
    consumer.subscribe(['httpry_logs'])
    try:
        while True:
            msg = consumer.poll()
            if msg:
                if not msg.error():
                    Msg = msg.value().decode('utf-8').strip()
                    try:
                        tm = time.strftime('%Y%m%d%H%M', time.localtime())
                        httpry_Key = 'httpry_domain.%s' % tm
                        if Msg:
                            msg = Msg.split()
                            if len(msg) == 11:
                                if msg[6] != '-':
                                    RC.zincrby(httpry_Key,msg[6], 1)
                                    RC.expire(httpry_Key,600)
                    except Exception as e:
                        logging.error(e)
                        continue
                elif msg.error().code() != KafkaError._PARTITION_EOF:
                    logging.error(msg.error())
                    continue
    except Exception as e:
        logging.error(e)
    finally:
        consumer.close()

Exemple #60

-1

Afficher le fichier

Fichier : Task.py Projet : newbelee/opsweb

def analytics_internet2_logs():
    consumer = Consumer({'bootstrap.servers': kafka_hosts, 'group.id': 'Internet2_logs_%s' %dt,'default.topic.config': {'auto.offset.reset': 'latest','auto.commit.enable':'true'}})
    consumer.subscribe(['haproxy_logs'])
    try:
        while True:
            msg = consumer.poll()
            if not msg.error():
                Msg = msg.value().decode('utf-8').strip()
                try:
                    tt = time.strftime('%Y%m%d', time.localtime())
                    tm = time.strftime('%Y%m%d%H%M', time.localtime())
                    Tm = time.strftime('%H:%M', time.localtime())
                    Tra_ser_minute_Key = 'traffic.ser.%s' % tm
                    Tra_cli_minute_Key = 'traffic.cli.%s' % tm
                    if Msg:
                        Msg = Msg.split()
                        if len(Msg) >= 17:
                            traffic_cli = Msg[10]
                            traffic_ser = Msg[11]
                            Topic = str(Msg[14]).split('|')[0].replace('{', '').strip()
                            IP = str(Msg[5])
                            Rtime = Msg[8].split('/')[-1]
                            if Rtime.isdigit():
                                Rtime = int(Rtime)
                            else:
                                Rtime = 0
                            uv_key = 'baihe_uv_%s' % tt
                            Rt_Key = 'Rtime_%s_%s' % (tt, Topic)
                            PATH = str(Msg[16]).split('?')[0]
                            URL = 'http://%s%s' % (Topic,PATH)
                            Tra_ser_url_minute_Key = 'traffic.ser.url_%s' % Tm
                            Tra_cli_url_minute_Key = 'traffic.cli.url_%s' % Tm
                            for KEY in (uv_key,Rt_Key,Tra_ser_url_minute_Key,Tra_cli_url_minute_Key):
                                RC.expire(KEY,3600)
                            # 流量
                            if traffic_ser.isdigit() and traffic_cli.isdigit():
                                RC.zincrby(Tra_cli_url_minute_Key, URL, int(traffic_cli))
                                RC.zincrby(Tra_ser_url_minute_Key,URL, int(traffic_ser))
                                # 实时流量
                                RC.zincrby(Tra_cli_minute_Key, Topic, int(traffic_cli))
                                RC.expire(Tra_cli_minute_Key, 300)
                                RC.zincrby(Tra_ser_minute_Key, Topic, int(traffic_ser))
                                RC.expire(Tra_ser_minute_Key, 300)
                            #
                            if Rtime:
                                RC.lpush(Rt_Key, Rtime)
                                RC.sadd(uv_key, IP)
                except Exception as e:
                    logging.error(e)
                    continue
            elif msg.error().code() != KafkaError._PARTITION_EOF:
                logging.error(msg.error())
                continue
    except Exception as e:
        logging.error(e)
    finally:
        consumer.close()