Example #1
0
    def _startContext(self, topic):
        # Start the StreamingContext and also collect the result
        stream = MQTTUtils.createStream(self.ssc, "tcp://" + self._MQTTTestUtils.brokerUri(), topic)
        result = []

        def getOutput(_, rdd):
            for data in rdd.collect():
                result.append(data)

        stream.foreachRDD(getOutput)
        self.ssc.start()
        return result
Example #2
0
    def _startContext(self, topic):
        # Start the StreamingContext and also collect the result
        stream = MQTTUtils.createStream(self.ssc, "tcp://" + self._MQTTTestUtils.brokerUri(), topic)
        result = []

        def getOutput(_, rdd):
            for data in rdd.collect():
                result.append(data)

        stream.foreachRDD(getOutput)
        self.ssc.start()
        return result
Example #3
0
    `$ bin/spark-submit --jars external/mqtt-assembly/target/scala-*/\
      spark-streaming-mqtt-assembly-*.jar examples/src/main/python/streaming/mqtt_wordcount.py \
      tcp://localhost:1883 foo`
"""

import sys

from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.mqtt import MQTTUtils

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print >> sys.stderr, "Usage: mqtt_wordcount.py <broker url> <topic>"
        exit(-1)

    sc = SparkContext(appName="PythonStreamingMQTTWordCount")
    ssc = StreamingContext(sc, 1)

    brokerUrl = sys.argv[1]
    topic = sys.argv[2]

    lines = MQTTUtils.createStream(ssc, brokerUrl, topic)
    counts = lines.flatMap(lambda line: line.split(" ")) \
        .map(lambda word: (word, 1)) \
        .reduceByKey(lambda a, b: a+b)
    counts.pprint()

    ssc.start()
    ssc.awaitTermination()
Example #4
0
      external/mqtt-assembly/target/scala-*/spark-streaming-mqtt-assembly-*.jar \
      examples/src/main/python/streaming/mqtt_wordcount.py \
      tcp://localhost:1883 foo`
"""

import sys

from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.mqtt import MQTTUtils

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print >> sys.stderr, "Usage: mqtt_wordcount.py <broker url> <topic>"
        exit(-1)

    sc = SparkContext(appName="PythonStreamingMQTTWordCount")
    ssc = StreamingContext(sc, 1)

    brokerUrl = sys.argv[1]
    topic = sys.argv[2]

    lines = MQTTUtils.createStream(ssc, brokerUrl, topic)
    counts = lines.flatMap(lambda line: line.split(" ")) \
        .map(lambda word: (word, 1)) \
        .reduceByKey(lambda a, b: a+b)
    counts.pprint()

    ssc.start()
    ssc.awaitTermination()
Example #5
0
dsn_hostname = "localhost"
dsn_port = "5432"
dsn_uid = "postgres"
dsn_pwd = "1234"

sc = SparkContext()
ssc = StreamingContext(sc, 60)

contador = 0

IP_RABBIT = "192.168.0.102"
MQTT_PORT = "1883"

mqttStream = MQTTUtils.createStream(
    ssc,
    "tcp://" + IP_RABBIT + ':' +
    MQTT_PORT,  # Note both port number and protocol
    "qunews/coletor/ceagri"  # The same routing key as used by producer
)


def ConsultarBanco(mac):
    try:
        conn_string = "host=" + dsn_hostname + " port=" + dsn_port + " dbname=" + dsn_database + " user="******" password="******"Connecting to database\n  ->%s" % (conn_string))
        conn = psycopg2.connect(conn_string)
        print("Connected!\n")
    except:
        print("Unable to connect to the database.")
    cursor = conn.cursor()
    cursor.execute(
        """SELECT tt.nome from tipo_pessoamac tpm inner join tipo_pessoa tp on tpm.pessoa_fk_id = tp.id inner join tipo_tipo tt on tp.tiporef_id = tt.id where tpm.mac = """
Example #6
0
        print('No values received...')
    else:
        mean_var = statistics(rdd)
        kmeans( rdd )
        print('Centroid:')
        print(mean_var[0])
        print('Variance:')
        print(mean_var[1])
        print('Kmeans centers:')
        print(C)
        json_dict = json.dumps({'values':rdd, 'centroid':mean_var[0], 'variance':mean_var[1], 'kmeans_centers':C}) 
        publish.single("cis/statistics", json_dict, hostname='test.mosquitto.org', port=1883);         #Publishes json dict
        print('Json object published....')

brokerUrl = "tcp://brix.d.cs.uoregon.edu:8100"
topic = "cis/soundtest/preprocessed"

mqttStream = MQTTUtils.createStream(ssc, brokerUrl, topic)

batch = mqttStream.map(lambda message: message.split(", ")) \
    .map(lambda x : tuple(map(lambda num : float(num),x))) 
    
batch.pprint()
batch.foreachRDD(lambda rdd: (get_json(rdd.collect())))

publish.single("cis/statistics", json.dumps({ 'clear': True }), hostname='test.mosquitto.org', port=1883);

ssc.start()
ssc.awaitTermination()
ssc.stop()
Example #7
0
   print " [x] sent %r" % message
   print "*******************"
   #time.sleep(1)

ipServidorRabbit = sys.argv[1]
pX = int(sys.argv[2])
pY = int(sys.argv[3])

x_min = 0
x_max = 52483
deslocamentoY = 33960
y_min = 0
y_max = 67925 # 33965 + 33960

sc = SparkContext()
ssc = StreamingContext(sc, 10)

lines = MQTTUtils.createStream(
   ssc,
   "tcp://%s:1883" % ipServidorRabbit,  
   "hello"                  
)

counts = lines.map(mapear)

counts.pprint()

ssc.start()
ssc.awaitTermination()
ssc.stop()
Example #8
0
# -*- coding: utf-8 -*-
# Execução:
# spark-submit --jars <arquivo.jar> script.py
#
from pyspark import SparkContext, SQLContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.mqtt import MQTTUtils

sc = SparkContext(appName="EventStreamReader")
sqlc = SQLContext(sc)
ssc = StreamingContext(sc, 4)
ssc.checkpoint("/tmp/spark-streaming-checkpoints")

brokerUrl = 'tcp://localhost:1883'
friendships = MQTTUtils.createStream(ssc, brokerUrl, "friendships")
comments = MQTTUtils.createStream(ssc, brokerUrl, "comments")
likes = MQTTUtils.createStream(ssc, brokerUrl, "likes")
posts = MQTTUtils.createStream(ssc, brokerUrl, "posts")

posts_list = []
event_update_list = []


def update_function(new_value, last_value):
    event = last_value

    if new_value:
        event = new_value

        if last_value:
            for element in last_value:
Example #9
0
                                                  expArrival=exp,
                                                  delayed=True,
                                                  delay=delay)
            else:
                state[trainNumber] = dict(stationId=stationId,
                                          expArrival=exp,
                                          delayed=False,
                                          delay=0)
    return state


sc = SparkContext(appName="TFLStreaming")
ssc = StreamingContext(sc, 5)  # batch interval 5 sec
ssc.checkpoint(cpDir)

lines = MQTTUtils.createStream(ssc, brokerUrl, listenTopic)
windowed = lines.window(
    600,
    5)  # look at the last 10 minutes worth with a sliding window of 5 seconds
dicts = lines.map(
    lambda js: json.loads(js))  # convert from json into a Python dict
mapped = dicts.map(lambda d:
                   (d['trainNumber'], d))  # make the train number the key
ds = mapped.updateStateByKey(update)  # compare against previous data
info = ds.filter(lambda (r, d): bool(d))  # ignore if there is no previous data
# the state from the update is a dict (train -> info)
# this is then mapped with a key so we have (train, (train->info))
# so let's get rid of the redundancy
unpack = info.map(lambda (r, d): (r, d[r]))
# now let's swap this over so that the key is whether the train is delayed or not, and assign a count
remap = unpack.map(lambda (r, d): ('delayed', 1)
Example #10
0
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.mqtt import MQTTUtils

sc = SparkContext()
ssc = StreamingContext(sc, 10)

mqttStream = MQTTUtils.createStream(
    ssc,
    "tcp://localhost:1883",  # Note both port number and protocol
    "hello"  # The same routing key as used by producer
)
#mqttStream.count().pprint()
mqttStream.pprint()
ssc.start()
ssc.awaitTermination()
ssc.stop()
                else:
                    delay = exp-old['expArrival']
                    delay = delay.seconds
                    if (delay > 60): #anything less that a minute is not "delayed"
                        state[trainNumber] = dict(stationId = stationId, expArrival = exp, delayed = True, delay = delay)
            else:
                state[trainNumber] = dict(stationId = stationId, expArrival = exp, delayed = False, delay = 0)
    return state 



sc = SparkContext(appName="TFLStreaming")
ssc = StreamingContext(sc, 5) # batch interval 5 sec
ssc.checkpoint(cpDir)

lines = MQTTUtils.createStream(ssc, brokerUrl, listenTopic)
windowed = lines.window(600,5) # look at the last 10 minutes worth with a sliding window of 5 seconds

dicts = lines.map(lambda js: json.loads(js)) # convert from json into a Python dict
mapped = dicts.map(lambda d: (d['trainNumber'],d)) # make the train number the key
ds = mapped.updateStateByKey(update) # compare against previous data
info = ds.filter(lambda (r, d): bool(d)) # ignore if there is no previous data
# the state from the update is a dict (train -> info)
# this is then mapped with a key so we have (train, (train->info))
# so let's get rid of the redundancy
unpack = info.map(lambda (r, d): (r, d[r]))
# now let's swap this over so that the key is whether the train is delayed or not, and assign a count

ontime = unpack.filter(lambda (r,d): not d['delayed'])
remap = ontime.map(lambda (r,d): (d['stationId'],1))
#now let's count the results with a reducer
from pyspark.streaming.mqtt import MQTTUtils
import pika


sc = SparkContext("local[5]", "Jesus Christ that's Jason Bourne")
ssc = StreamingContext(sc, 1)
ssc.checkpoint("/tmp/spark-streaming-checkpoints")

parameters = pika.ConnectionParameters(host='localhost', port=5672)
connection = pika.BlockingConnection(parameters)
channel = connection.channel()
broker_url = 'tcp://localhost:1883'
exchange_name = "amq.topic"
queue_name = "SPARK_POST_STRUCTURES"

post_structures = MQTTUtils.createStream(ssc, broker_url, queue_name)
sep = ">>"


def push_scores_to_queue(time, rdd):
    print("======{}======".format(time))
    elements = None

    if rdd.isEmpty():
        print("-EMPTY-")
    else:
        elements = rdd.map(lambda pair: (str(pair[0]), str(pair[1]))).collect()

        content = []
        for e in elements:
            print(e)
Example #13
0
import operator

from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.mqtt import MQTTUtils

sc = SparkContext(appName="TemperatureHistory")
ssc = StreamingContext(sc, 1)
ssc.checkpoint("checkpoint")

# broker URI
brokerUrl = "tcp://192.168.2.26:1883" # "tcp://iot.eclipse.org:1883"
# topic or topic pattern where temperature data is being sent
topic = "+/+/sensors/temperature"

mqttStream = MQTTUtils.createStream(ssc, brokerUrl, topic)

counts = mqttStream \
    .filter(lambda message: is_number(message)) \
    .map(lambda message: ( round(float(message) * 2, 0) / 2, 1 )) \
    .reduceByKeyAndWindow(operator.add, operator.sub, 15, 1) \
    .transform(lambda rdd: rdd.sortByKey())


def printHistogram(time, rdd):
    c = rdd.collect()
    print("-------------------------------------------")
    print("Time: %s" % time)
    print("-------------------------------------------")
    for record in c:
    	# "draw" our lil' ASCII-based histogram