Ejemplo n.º 1
0
async def f():
    # source = Stream(asynchronous=True)
    # source.rate_limit(0.500).sink(write)

    # for x in range(10):
    #     await source.emit(x)

    kafkaConfig = {'bootstrap.servers': 'localhost:9092', 'group.id': 'streamz-minas'}
    topic = 'test'
    kafkaSource = Stream.from_kafka([topic], kafkaConfig, start=True, asynchronous=True)
    
    results = {}
    def asSink(name):
        results[name] = []
        results[name + 'timestamp'] = time.time_ns()
        results[name + 'elapsed'] = time.time_ns()
        def f(val):
            results[name].append(val)
            results[name + 'elapsed'] = time.time_ns() - results[name + 'timestamp']
            print(name, val)
        return f
    print('kafkaSource map')
    kafkaSource.map(combinedMap).sink(asSink('raw'))

    kafkaSource.visualize()
    while len(results['raw']) < 200:
        print('len(results[raw])', len(results['raw']))
        await gen.sleep(1)
    print(results)
    avg = (sum(timeDiffs) / len(timeDiffs)) * 10**(-9)
    print(avg, 'avgs', 1/avg, 'item/s')
Ejemplo n.º 2
0
async def minas_consumer_streamz():
    kafkaSource = Stream.from_kafka([topic],
                                    consumer_params=kafkaConfig,
                                    start=True,
                                    asynchronous=False).buffer(5)
    kprod = confluent_kafka.Producer(kafkaConfig)

    def combinedMap(jsonItem):
        di = json.loads(jsonItem)
        if not 'example' in di:
            return
        example = Example(**di['example'])
        timeDiff = time.time_ns() - example.timestamp
        return (minDist(clusters, example.item), timeDiff)

    results = {}

    def asSink(name):
        results[name] = []
        results[name + 'timestamp'] = time.time_ns()
        results[name + 'elapsed'] = time.time_ns()

        def f(val):
            results[name].append(val)
            results[name +
                    'elapsed'] = time.time_ns() - results[name + 'timestamp']
            value = json.dumps({'d': val[0][0], 'label': val[0][1].label})
            kprod.produce(topic=topic + '_out', value=value)
            print(name, val)

        return f

    print('kafkaSource map')
    outStream = kafkaSource.map(combinedMap).sink(asSink('raw'))
    outStream = kafkaSource.scatter().map(combinedMap).gatter().sink(
        asSink('raw'))

    while True:
        await gen.sleep(1000)
Ejemplo n.º 3
0
import json
from dask.distributed import Client
from time import sleep
import random
from time import time

#dask
client = Client('35.180.242.51:8786')
import os
client.run(lambda: os.system("pip install cassandra-driver"))
print('befor upload')

#streamz
source = Stream.from_kafka(['supramoteur'], {
    'bootstrap.servers': '35.180.242.51:9092',
    'group.id': 'mygroup1'
},
                           loop=client.loop)


def time_final(entry):
    tt = time() - entry['time']
    return tt


def ecrire(entry):
    entry['time_before'] = time()
    with open('data.txt', "a+") as f:
        f.write("%s\r\n" % entry)
        insert_to(entry)
    entry['time_after'] = round(-entry['time'], 4)