def _startContext(self, topic): # Start the StreamingContext and also collect the result stream = MQTTUtils.createStream(self.ssc, "tcp://" + self._MQTTTestUtils.brokerUri(), topic) result = [] def getOutput(_, rdd): for data in rdd.collect(): result.append(data) stream.foreachRDD(getOutput) self.ssc.start() return result
`$ bin/spark-submit --jars external/mqtt-assembly/target/scala-*/\ spark-streaming-mqtt-assembly-*.jar examples/src/main/python/streaming/mqtt_wordcount.py \ tcp://localhost:1883 foo` """ import sys from pyspark import SparkContext from pyspark.streaming import StreamingContext from pyspark.streaming.mqtt import MQTTUtils if __name__ == "__main__": if len(sys.argv) != 3: print >> sys.stderr, "Usage: mqtt_wordcount.py <broker url> <topic>" exit(-1) sc = SparkContext(appName="PythonStreamingMQTTWordCount") ssc = StreamingContext(sc, 1) brokerUrl = sys.argv[1] topic = sys.argv[2] lines = MQTTUtils.createStream(ssc, brokerUrl, topic) counts = lines.flatMap(lambda line: line.split(" ")) \ .map(lambda word: (word, 1)) \ .reduceByKey(lambda a, b: a+b) counts.pprint() ssc.start() ssc.awaitTermination()
external/mqtt-assembly/target/scala-*/spark-streaming-mqtt-assembly-*.jar \ examples/src/main/python/streaming/mqtt_wordcount.py \ tcp://localhost:1883 foo` """ import sys from pyspark import SparkContext from pyspark.streaming import StreamingContext from pyspark.streaming.mqtt import MQTTUtils if __name__ == "__main__": if len(sys.argv) != 3: print >> sys.stderr, "Usage: mqtt_wordcount.py <broker url> <topic>" exit(-1) sc = SparkContext(appName="PythonStreamingMQTTWordCount") ssc = StreamingContext(sc, 1) brokerUrl = sys.argv[1] topic = sys.argv[2] lines = MQTTUtils.createStream(ssc, brokerUrl, topic) counts = lines.flatMap(lambda line: line.split(" ")) \ .map(lambda word: (word, 1)) \ .reduceByKey(lambda a, b: a+b) counts.pprint() ssc.start() ssc.awaitTermination()
dsn_hostname = "localhost" dsn_port = "5432" dsn_uid = "postgres" dsn_pwd = "1234" sc = SparkContext() ssc = StreamingContext(sc, 60) contador = 0 IP_RABBIT = "192.168.0.102" MQTT_PORT = "1883" mqttStream = MQTTUtils.createStream( ssc, "tcp://" + IP_RABBIT + ':' + MQTT_PORT, # Note both port number and protocol "qunews/coletor/ceagri" # The same routing key as used by producer ) def ConsultarBanco(mac): try: conn_string = "host=" + dsn_hostname + " port=" + dsn_port + " dbname=" + dsn_database + " user="******" password="******"Connecting to database\n ->%s" % (conn_string)) conn = psycopg2.connect(conn_string) print("Connected!\n") except: print("Unable to connect to the database.") cursor = conn.cursor() cursor.execute( """SELECT tt.nome from tipo_pessoamac tpm inner join tipo_pessoa tp on tpm.pessoa_fk_id = tp.id inner join tipo_tipo tt on tp.tiporef_id = tt.id where tpm.mac = """
print('No values received...') else: mean_var = statistics(rdd) kmeans( rdd ) print('Centroid:') print(mean_var[0]) print('Variance:') print(mean_var[1]) print('Kmeans centers:') print(C) json_dict = json.dumps({'values':rdd, 'centroid':mean_var[0], 'variance':mean_var[1], 'kmeans_centers':C}) publish.single("cis/statistics", json_dict, hostname='test.mosquitto.org', port=1883); #Publishes json dict print('Json object published....') brokerUrl = "tcp://brix.d.cs.uoregon.edu:8100" topic = "cis/soundtest/preprocessed" mqttStream = MQTTUtils.createStream(ssc, brokerUrl, topic) batch = mqttStream.map(lambda message: message.split(", ")) \ .map(lambda x : tuple(map(lambda num : float(num),x))) batch.pprint() batch.foreachRDD(lambda rdd: (get_json(rdd.collect()))) publish.single("cis/statistics", json.dumps({ 'clear': True }), hostname='test.mosquitto.org', port=1883); ssc.start() ssc.awaitTermination() ssc.stop()
print " [x] sent %r" % message print "*******************" #time.sleep(1) ipServidorRabbit = sys.argv[1] pX = int(sys.argv[2]) pY = int(sys.argv[3]) x_min = 0 x_max = 52483 deslocamentoY = 33960 y_min = 0 y_max = 67925 # 33965 + 33960 sc = SparkContext() ssc = StreamingContext(sc, 10) lines = MQTTUtils.createStream( ssc, "tcp://%s:1883" % ipServidorRabbit, "hello" ) counts = lines.map(mapear) counts.pprint() ssc.start() ssc.awaitTermination() ssc.stop()
# -*- coding: utf-8 -*- # Execução: # spark-submit --jars <arquivo.jar> script.py # from pyspark import SparkContext, SQLContext from pyspark.streaming import StreamingContext from pyspark.streaming.mqtt import MQTTUtils sc = SparkContext(appName="EventStreamReader") sqlc = SQLContext(sc) ssc = StreamingContext(sc, 4) ssc.checkpoint("/tmp/spark-streaming-checkpoints") brokerUrl = 'tcp://localhost:1883' friendships = MQTTUtils.createStream(ssc, brokerUrl, "friendships") comments = MQTTUtils.createStream(ssc, brokerUrl, "comments") likes = MQTTUtils.createStream(ssc, brokerUrl, "likes") posts = MQTTUtils.createStream(ssc, brokerUrl, "posts") posts_list = [] event_update_list = [] def update_function(new_value, last_value): event = last_value if new_value: event = new_value if last_value: for element in last_value:
expArrival=exp, delayed=True, delay=delay) else: state[trainNumber] = dict(stationId=stationId, expArrival=exp, delayed=False, delay=0) return state sc = SparkContext(appName="TFLStreaming") ssc = StreamingContext(sc, 5) # batch interval 5 sec ssc.checkpoint(cpDir) lines = MQTTUtils.createStream(ssc, brokerUrl, listenTopic) windowed = lines.window( 600, 5) # look at the last 10 minutes worth with a sliding window of 5 seconds dicts = lines.map( lambda js: json.loads(js)) # convert from json into a Python dict mapped = dicts.map(lambda d: (d['trainNumber'], d)) # make the train number the key ds = mapped.updateStateByKey(update) # compare against previous data info = ds.filter(lambda (r, d): bool(d)) # ignore if there is no previous data # the state from the update is a dict (train -> info) # this is then mapped with a key so we have (train, (train->info)) # so let's get rid of the redundancy unpack = info.map(lambda (r, d): (r, d[r])) # now let's swap this over so that the key is whether the train is delayed or not, and assign a count remap = unpack.map(lambda (r, d): ('delayed', 1)
from pyspark import SparkContext from pyspark.streaming import StreamingContext from pyspark.streaming.mqtt import MQTTUtils sc = SparkContext() ssc = StreamingContext(sc, 10) mqttStream = MQTTUtils.createStream( ssc, "tcp://localhost:1883", # Note both port number and protocol "hello" # The same routing key as used by producer ) #mqttStream.count().pprint() mqttStream.pprint() ssc.start() ssc.awaitTermination() ssc.stop()
else: delay = exp-old['expArrival'] delay = delay.seconds if (delay > 60): #anything less that a minute is not "delayed" state[trainNumber] = dict(stationId = stationId, expArrival = exp, delayed = True, delay = delay) else: state[trainNumber] = dict(stationId = stationId, expArrival = exp, delayed = False, delay = 0) return state sc = SparkContext(appName="TFLStreaming") ssc = StreamingContext(sc, 5) # batch interval 5 sec ssc.checkpoint(cpDir) lines = MQTTUtils.createStream(ssc, brokerUrl, listenTopic) windowed = lines.window(600,5) # look at the last 10 minutes worth with a sliding window of 5 seconds dicts = lines.map(lambda js: json.loads(js)) # convert from json into a Python dict mapped = dicts.map(lambda d: (d['trainNumber'],d)) # make the train number the key ds = mapped.updateStateByKey(update) # compare against previous data info = ds.filter(lambda (r, d): bool(d)) # ignore if there is no previous data # the state from the update is a dict (train -> info) # this is then mapped with a key so we have (train, (train->info)) # so let's get rid of the redundancy unpack = info.map(lambda (r, d): (r, d[r])) # now let's swap this over so that the key is whether the train is delayed or not, and assign a count ontime = unpack.filter(lambda (r,d): not d['delayed']) remap = ontime.map(lambda (r,d): (d['stationId'],1)) #now let's count the results with a reducer
from pyspark.streaming.mqtt import MQTTUtils import pika sc = SparkContext("local[5]", "Jesus Christ that's Jason Bourne") ssc = StreamingContext(sc, 1) ssc.checkpoint("/tmp/spark-streaming-checkpoints") parameters = pika.ConnectionParameters(host='localhost', port=5672) connection = pika.BlockingConnection(parameters) channel = connection.channel() broker_url = 'tcp://localhost:1883' exchange_name = "amq.topic" queue_name = "SPARK_POST_STRUCTURES" post_structures = MQTTUtils.createStream(ssc, broker_url, queue_name) sep = ">>" def push_scores_to_queue(time, rdd): print("======{}======".format(time)) elements = None if rdd.isEmpty(): print("-EMPTY-") else: elements = rdd.map(lambda pair: (str(pair[0]), str(pair[1]))).collect() content = [] for e in elements: print(e)
import operator from pyspark import SparkContext from pyspark.streaming import StreamingContext from pyspark.streaming.mqtt import MQTTUtils sc = SparkContext(appName="TemperatureHistory") ssc = StreamingContext(sc, 1) ssc.checkpoint("checkpoint") # broker URI brokerUrl = "tcp://192.168.2.26:1883" # "tcp://iot.eclipse.org:1883" # topic or topic pattern where temperature data is being sent topic = "+/+/sensors/temperature" mqttStream = MQTTUtils.createStream(ssc, brokerUrl, topic) counts = mqttStream \ .filter(lambda message: is_number(message)) \ .map(lambda message: ( round(float(message) * 2, 0) / 2, 1 )) \ .reduceByKeyAndWindow(operator.add, operator.sub, 15, 1) \ .transform(lambda rdd: rdd.sortByKey()) def printHistogram(time, rdd): c = rdd.collect() print("-------------------------------------------") print("Time: %s" % time) print("-------------------------------------------") for record in c: # "draw" our lil' ASCII-based histogram