Ejemplo n.º 1
0
def flatMap1(kv):
    flatlist = [KeyValuePair(0, -1), KeyValuePair(1, -1), KeyValuePair(2, -1)]
    val = kv.val
    try:
        sensorVals = [val['sensor1'], val['sensor2'], val['sensor3'], val['sensor4'], val['sensor5']]
        flatlist = [KeyValuePair(0, np.average(sensorVals)), KeyValuePair(1, np.std(sensorVals)), KeyValuePair(2, np.log(np.sum(sensorVals)))]
    except:
        pass
    return flatlist
Ejemplo n.º 2
0
 def flatMapValues(self, func):
     '''
     func is lambda or method that changes values to list of values to be flattened
     '''
     # print('flatmapvalues')
     valfunc = (lambda kv: KeyValuePair(kv.key, func(kv.val)))
     self.stream = map(valfunc, self.stream)
     flattenvalues = (
         lambda kv: [KeyValuePair(kv.key, val) for val in kv.val])
     self.stream = chain.from_iterable(map(flattenvalues, self.stream))
     return self
Ejemplo n.º 3
0
 def kafkaconsumer(self):
     print("Kafka Consumer START... topic:", self.topic)
     self.consumer.subscribe(self.topic)
     for msg in self.consumer:
         # print original message
         # print ("%s:%d:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value))
         # convert message to KeyValuePair
         kv = KeyValuePair(serde(msg.key, self.keyserde),
                           serde(msg.value, self.valserde))
         kv.partition = msg.partition
         kv.offset = msg.offset
         kv.time = time.time()
         # write message to queue for topic
         self.builder.put_nowait(kv)
Ejemplo n.º 4
0
 def mapValues(self, func):
     '''
     func is lambda or method that changes values 
     '''
     valfunc = (lambda kv: KeyValuePair(kv.key, func(kv.val)))
     self.stream = map(valfunc, self.stream)
     return self
Ejemplo n.º 5
0
 def mapValues(self, func):
     '''
     func is lambda or method that changes values 
     '''
     materialized = self.materialized + '_mapped'
     self.kstores[materialized] = KStore(materialized)
     # populate table
     valfunc = (lambda kv: KeyValuePair(kv.key, func(kv.val)))
     self.stream = self.table(map(valfunc, self.stream),
                              self.kstores[materialized])
     self.materialized = materialized
     return self
Ejemplo n.º 6
0
def make_clicks():
    producer = KafkaProducer(bootstrap_servers=config['bootstrap.servers'])
    print('make_clicks')
    while True:
        name = names[randint(0, 6)]
        producer.send(topics, key='0'.encode('utf-8'), value=name.encode('utf-8')) 
        # print(name,'sent...')
        # sleep for random interval
        interval = randint(2, 15) * 0.5
        # print('interval', interval)
        time.sleep(interval)


if __name__ == '__main__':
    p = Process(target=make_clicks)
    p.start()
    # define builder for streams/table
    builder = KStreamBuilder(topics, config, keyserde='str', valserde='str')
    # initialize Kstream with builder
    views = KStream(builder.builder)

    views.map(lambda kv: KeyValuePair(kv.val, kv.val, time=kv.time))
    
    anamolous_users = views.count(materialized='usercounts', kwindow=KWindow(60)).filter(lambda kv: kv.val >= 3)

    # change table back to stream and then send output to Kafka topic
    anamolous_users.toStream().peeker(peek).to('anomalous', config, keyserde='str', valserde='str')

    # Note that KStream can either be initialized with a KStreambuilder or a stream (generator) with a Kafka Producer
    
    
Ejemplo n.º 7
0
Created on Feb 22, 2019

@author: Barnwaldo
'''
import numpy as np
from core.kbuilder import KStreamBuilder, KStream
from core.kutils import KeyValuePair

                             
config = {'bootstrap.servers': 'localhost:9092', 'group.id': 'barnwaldo', 'session.timeout.ms': 6000}
# config = {'bootstrap.servers': '192.168.21.3:9092', 'group.id': 'barnwaldo', 'session.timeout.ms': 6000}
topics = 'endpt00'
                        

       
func1 = (lambda kv: KeyValuePair(kv.key / 10, kv.val if kv.val == ">>> stop message <<<" \
          else (kv.val['sensor1'] + kv.val['sensor2'] + kv.val['sensor3'] + kv.val['sensor4'] + kv.val['sensor5']) /5))

func2 = (lambda kv: KeyValuePair(kv.key, kv.val if kv.val == ">>> stop message <<<" \
          else (kv.val['sensor1'] + kv.val['sensor2'] + kv.val['sensor3'] + kv.val['sensor4'] + kv.val['sensor5']) /5))


pred1 = (lambda kv: kv.key < 4)
pred2 = (lambda kv: kv.key < 7)
pred3 = (lambda kv: True)

predicates = [pred1, pred2, pred3]

def flatMapValues1(val):
    flatlist = [-1, -1, -1]
    try:
        sensorVals = [val['sensor1'], val['sensor2'], val['sensor3'], val['sensor4'], val['sensor5']]
Ejemplo n.º 8
0
Word count Kafka Streams example - migrated from Java to Python pytreams
'''

from core.kbuilder import KStreamBuilder, KStream
from core.kutils import KeyValuePair

# config info for Kafka broker
config = {
    'bootstrap.servers': '192.168.21.3:9092',
    'group.id': 'barnwaldo',
    'session.timeout.ms': 6000
}
topics = 'word'

if __name__ == '__main__':

    # define builder for streams/table
    builder = KStreamBuilder(topics, config, keyserde='str', valserde='str')
    # initialize Kstream with builder
    textlines = KStream(builder.builder)

    # stream with splitting text, flattening and grouping - use Python lambdas in pystreams methods
    textlines.flatMapValues(lambda text: text.lower().split()).groupBy(
        lambda kv: KeyValuePair(kv.val, kv.val))
    # change stream to table with count on key - materialize is name of db table
    wordcounts = textlines.count(materialized='wordcounter')

    # change table back to stream and then send output to Kafka topic
    wordcounts.toStream().to('counts', config, keyserde='str', valserde='str')

    # Note that KStream can either be initialized with a KStreambuilder or a stream (generator) with a Kafka Producer