async def f(): # source = Stream(asynchronous=True) # source.rate_limit(0.500).sink(write) # for x in range(10): # await source.emit(x) kafkaConfig = {'bootstrap.servers': 'localhost:9092', 'group.id': 'streamz-minas'} topic = 'test' kafkaSource = Stream.from_kafka([topic], kafkaConfig, start=True, asynchronous=True) results = {} def asSink(name): results[name] = [] results[name + 'timestamp'] = time.time_ns() results[name + 'elapsed'] = time.time_ns() def f(val): results[name].append(val) results[name + 'elapsed'] = time.time_ns() - results[name + 'timestamp'] print(name, val) return f print('kafkaSource map') kafkaSource.map(combinedMap).sink(asSink('raw')) kafkaSource.visualize() while len(results['raw']) < 200: print('len(results[raw])', len(results['raw'])) await gen.sleep(1) print(results) avg = (sum(timeDiffs) / len(timeDiffs)) * 10**(-9) print(avg, 'avgs', 1/avg, 'item/s')
async def minas_consumer_streamz(): kafkaSource = Stream.from_kafka([topic], consumer_params=kafkaConfig, start=True, asynchronous=False).buffer(5) kprod = confluent_kafka.Producer(kafkaConfig) def combinedMap(jsonItem): di = json.loads(jsonItem) if not 'example' in di: return example = Example(**di['example']) timeDiff = time.time_ns() - example.timestamp return (minDist(clusters, example.item), timeDiff) results = {} def asSink(name): results[name] = [] results[name + 'timestamp'] = time.time_ns() results[name + 'elapsed'] = time.time_ns() def f(val): results[name].append(val) results[name + 'elapsed'] = time.time_ns() - results[name + 'timestamp'] value = json.dumps({'d': val[0][0], 'label': val[0][1].label}) kprod.produce(topic=topic + '_out', value=value) print(name, val) return f print('kafkaSource map') outStream = kafkaSource.map(combinedMap).sink(asSink('raw')) outStream = kafkaSource.scatter().map(combinedMap).gatter().sink( asSink('raw')) while True: await gen.sleep(1000)
import json from dask.distributed import Client from time import sleep import random from time import time #dask client = Client('35.180.242.51:8786') import os client.run(lambda: os.system("pip install cassandra-driver")) print('befor upload') #streamz source = Stream.from_kafka(['supramoteur'], { 'bootstrap.servers': '35.180.242.51:9092', 'group.id': 'mygroup1' }, loop=client.loop) def time_final(entry): tt = time() - entry['time'] return tt def ecrire(entry): entry['time_before'] = time() with open('data.txt', "a+") as f: f.write("%s\r\n" % entry) insert_to(entry) entry['time_after'] = round(-entry['time'], 4)