def main(): elaticIP = "10.0.0.204" elaticPort = 9200 url = "http://{}:{}/machan_laser/_search".format(elaticIP, elaticPort) conf = SparkConf() #conf = initSparkConfig(conf,"local",appName="StreamingKMeans") conf = initSparkConfig(conf, "spark://10.0.0.202:7077", appName="StreamingKMeans") sc = SparkContext(conf=conf) sc.setLogLevel("WARN") ssc = StreamingContext(sc, 1) mqttData = MQTTUtils.createStream(ssc, brokerIP, topic) programTime = [] programName = getInitData(url) changeCount = 0 for i in programName: subTime = getProcessTime(url, i) subTime.append(i) programTime.append(subTime) print(programTime) rProgramName = sc.parallelize(programTime) trainData = rProgramName.map(toVector) trainData.take(2)
def _startContext(self, topic): # Start the StreamingContext and also collect the result stream = MQTTUtils.createStream(self.ssc, "tcp://" + self._MQTTTestUtils.brokerUri(), topic) result = [] def getOutput(_, rdd): for data in rdd.collect(): result.append(data) stream.foreachRDD(getOutput) self.ssc.start() return result
def main(): conf = SparkConf() conf = initSparkConfig(conf, "local[3]", appName="mqttTest") #conf = initSparkConfig(conf,"spark://10.0.0.202:7077",appName="StreamingKMeans") sc = SparkContext(conf=conf) sc.setLogLevel("WARN") ssc = StreamingContext(sc, 3) mqttData = MQTTUtils.createStream(ssc, brokerIP, topic) r = mqttData.map(lambda x: x + "aa") r.pprint() ssc.start() ssc.awaitTermination()
def _startContext(self, topic): # Start the StreamingContext and also collect the result stream = MQTTUtils.createStream( self.ssc, "tcp://" + self._MQTTTestUtils.brokerUri(), topic) result = [] def getOutput(_, rdd): for data in rdd.collect(): result.append(data) stream.foreachRDD(getOutput) self.ssc.start() return result
def _start_context_with_paired_stream(self, topics): stream = MQTTUtils.createPairedStream( self.ssc, "tcp://" + self._MQTTTestUtils.brokerUri(), topics) # Keep a set because records can potentially be repeated. result = set() def getOutput(_, rdd): for data in rdd.collect(): result.add(data) stream.foreachRDD(getOutput) self.ssc.start() return result
def main(): with SparkContext(appName='MQTTstreaming') as sc: ssc = StreamingContext(sc,2) broker = 'tcp://' + sys.argv[1] topic = sys.argv[2] dataInput = MQTTUtils.createStream(ssc, broker, topic) result = dataInput.map(lambda x: x.split(" ")).map(load).map(convert) result.foreachRDD(lambda x: x.foreach(saveToEs)) result.pprint() ssc.start() ssc.awaitTermination() ssc.stop()
# # array_atual = m.calcular_total_de_pessoas_por_lugar_porcentagem(total_de_pessoas_3_locais, # total_de_pessoas_em_cada_local) # print(array_atual) #ALGORITMO SPARK if __name__ == "__main__": sc = SparkContext() ssc = StreamingContext(sc, 60) brokerUrl = "tcp://localhost:1883" topic1 = "hall" topic2 = "praca" lines_hall = MQTTUtils.createStream(ssc, brokerUrl, topic1) # lines_hall.pprint(100) # Split each line into macs # macs = lines_hall.flatMap(lambda line: line.split(",")[1::3]) # macs.pprint(100) # dic_hall = lines_hall.flatMap(lambda line: line.split(",")) # macs.pprint(100) dic_hall = {} keyPadrao = 'geral' # values = lines_hall.map(lambda line: line.split(",")) values = lines_hall.map(lambda line: [keyPadrao, [json.loads(line)]]) # .map(lambda k, v: dic_hall.update({k: v}), keys) # junk = map(lambda k, v: dic_hall.update({k: v}), keys, values[:])
`$ bin/run-example \ streaming-mqtt/examples/src/main/python/streaming/mqtt_wordcount.py tcp://localhost:1883 foo` """ import sys from pyspark import SparkContext from pyspark.streaming import StreamingContext from mqtt import MQTTUtils if __name__ == "__main__": if len(sys.argv) != 3: print >> sys.stderr, "Usage: mqtt_wordcount.py <broker url> <topic>" exit(-1) sc = SparkContext(appName="PythonStreamingMQTTWordCount") ssc = StreamingContext(sc, 1) brokerUrl = sys.argv[1] topic = sys.argv[2] lines = MQTTUtils.createStream(ssc, brokerUrl, topic) counts = lines.flatMap(lambda line: line.split(" ")) \ .map(lambda word: (word, 1)) \ .reduceByKey(lambda a, b: a+b) counts.pprint() ssc.start() ssc.awaitTermination()
def getTimeQtd(tp1, tp2): ts = [] ts.append(tp1[0]) ts.append(tp2[0]) return (max(ts), int(tp1[1]) + int(tp2[1])) if __name__ == "__main__": if len(sys.argv) != 3: print >> sys.stderr, "Usage: mqtt_wordcount.py < > <>" exit(-1) sc = SparkContext(appName="PythonStreamingMQTTWordCount") ssc = StreamingContext(sc, 120) brokerUrl = sys.argv[1] topic = sys.argv[2] print (brokerUrl) print (topic) lines = MQTTUtils.createStream(ssc, brokerUrl, topic) counts = lines.map(filtro) windowedWordCounts = counts.reduceByKeyAndWindow(agrupaTS, None, 2760, 120) windowedWordCounts = windowedWordCounts.map(getTimeMac) windowedWordCounts = windowedWordCounts.filter(limpeza) windowedWordCounts = windowedWordCounts.reduce(getTimeQtd) windowedWordCounts = windowedWordCounts.map(contar) windowedWordCounts.pprint() ssc.start() ssc.awaitTermination()
(time() * 1000) % 1000, interval.value, len(input))) if last_batch: ssc.stop() # Initialize features to <number of sensors>-length array, filled with neutral initial sensor value features = np.zeros(n_sensors) features.fill(0.5) # Initialize streaming for specified reporting interval sc = SparkContext(appName="iotstream_lr_mqtt") interval = sc.accumulator(0) empty_intervals = sc.accumulator(0) events = sc.accumulator(0) ssc = StreamingContext(sc, reporting_interval) sensor_stream = MQTTUtils.createStream(ssc, mqtt_URL, mqtt_topic) # Load pre-computed model model = LogisticRegressionModel.load(sc, modelname) # Run model on each batch #sensor_stream.pprint(10) sensor_stream.foreachRDD(run_model) # Start reading streaming data ssc.start() start_time = time() ssc.awaitTermination() finish_time = time() elapsed_time = finish_time - start_time - empty_intervals.value * reporting_interval - 1.5 # Subtract off time waiting for events and 1.5 sec for termination print(