change_dict = {
            "asset_id": line["asset_id"],
            "attr_name": line["attr_name"],
            "collect_time": line["collect_time"],
            "attr_id": line["attr_id"],
            "value": line["value"],
            "warring_name": line["warring_name"],
            "warring_state": line["warring_state"]
        }
        asset_attr_list.update(change_dict)
    return line["oid"], asset_attr_list
"""

if __name__ == "__main__":
    checkpoint_path = "hdfs://localhost:9000/checkpiont/streaming_cp_log"
    kafka_topic_list = ["realdata_receive"]
    broker_list_dit = {"metadata.broker.list": "101.200.194.191:9092"}

    setDefaultEncoding()
    ssc = initStreamingContext("streaming_kafka_deltaT", "local[2]", 5)
    ssc.checkpoint(checkpoint_path)

    kvs = KafkaUtils.createDirectStream(ssc, kafka_topic_list, broker_list_dit)
    deltaT = kvs.flatMap(lambda lines: toJson(lines)).map(lambda x: (x["oid"], x)).\
        updateStateByKey(updateFun).foreachRDD(foreachPartitionFun)

    ensureOffset(kvs=kvs)

    ssc.start()
    ssc.awaitTermination()
def sumFun(lines):
    line = json.loads(lines[1].encode("UTF-8"))
    #print type(line["oid"].encode("utf-8"))
    # return {"oid":line["oid"].encode("utf-8"),"value":line["value"]}
    return ("value", line["value"])

def reduceFun(rdds):
    global count
    for rdd in rdds.offsetRanges():
        count += 1
        print "count================" + str(count)

if __name__ == "__main__":
    setDefaultEncoding()
    ssc = initStreamingContext("streaming_kafka_avg", "local[2]", 5)
    ssc.checkpoint("hdfs://localhost:9000/checkpiont/streaming_cp_log")

    kvs = KafkaUtils.createDirectStream(ssc, ["realdata_receive"], {"metadata.broker.list": "192.168.108.222:9092"})
    kvs.map(lambda value: sumFun(value)).reduceByKey(reduceFun).\
        updateStateByKey(updateFun).pprint()

    kvs.foreachRDD(reduceFun)

    ensureOffset(kvs=kvs)

    ssc.start()
    ssc.awaitTermination()


        change_dict = {
            "asset_id": line["asset_id"],
            "attr_name": line["attr_name"],
            "collect_time": line["collect_time"],
            "attr_id": line["attr_id"],
            "value": line["value"],
            "warring_name": line["warring_name"],
            "warring_state": line["warring_state"]
        }
        asset_attr_list.update(change_dict)
    return line["oid"], asset_attr_list
"""

if __name__ == "__main__":
    checkpoint_path = "hdfs://localhost:9000/checkpiont/streaming_cp_log"
    kafka_topic_list = ["realdata_receive"]
    broker_list_dit = {"metadata.broker.list": "101.200.194.191:9092"}

    setDefaultEncoding()
    ssc = initStreamingContext("streaming_kafka_deltaT", "local[2]", 5)
    ssc.checkpoint(checkpoint_path)

    kvs = KafkaUtils.createDirectStream(ssc, kafka_topic_list, broker_list_dit)
    deltaT = kvs.flatMap(lambda lines: toJson(lines)).map(lambda x: (x["oid"], x)).\
        updateStateByKey(updateFun).foreachRDD(foreachPartitionFun)

    ensureOffset(kvs=kvs)

    ssc.start()
    ssc.awaitTermination()
Example #4
0
    line = json.loads(lines[1].encode("UTF-8"))
    #print type(line["oid"].encode("utf-8"))
    # return {"oid":line["oid"].encode("utf-8"),"value":line["value"]}
    return ("value", line["value"])


def reduceFun(rdds):
    global count
    for rdd in rdds.offsetRanges():
        count += 1
        print "count================" + str(count)


if __name__ == "__main__":
    setDefaultEncoding()
    ssc = initStreamingContext("streaming_kafka_avg", "local[2]", 5)
    ssc.checkpoint("hdfs://localhost:9000/checkpiont/streaming_cp_log")

    kvs = KafkaUtils.createDirectStream(
        ssc, ["realdata_receive"],
        {"metadata.broker.list": "192.168.108.222:9092"})
    kvs.map(lambda value: sumFun(value)).reduceByKey(reduceFun).\
        updateStateByKey(updateFun).pprint()

    kvs.foreachRDD(reduceFun)

    ensureOffset(kvs=kvs)

    ssc.start()
    ssc.awaitTermination()