change_dict = { "asset_id": line["asset_id"], "attr_name": line["attr_name"], "collect_time": line["collect_time"], "attr_id": line["attr_id"], "value": line["value"], "warring_name": line["warring_name"], "warring_state": line["warring_state"] } asset_attr_list.update(change_dict) return line["oid"], asset_attr_list """ if __name__ == "__main__": checkpoint_path = "hdfs://localhost:9000/checkpiont/streaming_cp_log" kafka_topic_list = ["realdata_receive"] broker_list_dit = {"metadata.broker.list": "101.200.194.191:9092"} setDefaultEncoding() ssc = initStreamingContext("streaming_kafka_deltaT", "local[2]", 5) ssc.checkpoint(checkpoint_path) kvs = KafkaUtils.createDirectStream(ssc, kafka_topic_list, broker_list_dit) deltaT = kvs.flatMap(lambda lines: toJson(lines)).map(lambda x: (x["oid"], x)).\ updateStateByKey(updateFun).foreachRDD(foreachPartitionFun) ensureOffset(kvs=kvs) ssc.start() ssc.awaitTermination()
def sumFun(lines): line = json.loads(lines[1].encode("UTF-8")) #print type(line["oid"].encode("utf-8")) # return {"oid":line["oid"].encode("utf-8"),"value":line["value"]} return ("value", line["value"]) def reduceFun(rdds): global count for rdd in rdds.offsetRanges(): count += 1 print "count================" + str(count) if __name__ == "__main__": setDefaultEncoding() ssc = initStreamingContext("streaming_kafka_avg", "local[2]", 5) ssc.checkpoint("hdfs://localhost:9000/checkpiont/streaming_cp_log") kvs = KafkaUtils.createDirectStream(ssc, ["realdata_receive"], {"metadata.broker.list": "192.168.108.222:9092"}) kvs.map(lambda value: sumFun(value)).reduceByKey(reduceFun).\ updateStateByKey(updateFun).pprint() kvs.foreachRDD(reduceFun) ensureOffset(kvs=kvs) ssc.start() ssc.awaitTermination()
line = json.loads(lines[1].encode("UTF-8")) #print type(line["oid"].encode("utf-8")) # return {"oid":line["oid"].encode("utf-8"),"value":line["value"]} return ("value", line["value"]) def reduceFun(rdds): global count for rdd in rdds.offsetRanges(): count += 1 print "count================" + str(count) if __name__ == "__main__": setDefaultEncoding() ssc = initStreamingContext("streaming_kafka_avg", "local[2]", 5) ssc.checkpoint("hdfs://localhost:9000/checkpiont/streaming_cp_log") kvs = KafkaUtils.createDirectStream( ssc, ["realdata_receive"], {"metadata.broker.list": "192.168.108.222:9092"}) kvs.map(lambda value: sumFun(value)).reduceByKey(reduceFun).\ updateStateByKey(updateFun).pprint() kvs.foreachRDD(reduceFun) ensureOffset(kvs=kvs) ssc.start() ssc.awaitTermination()