Beispiel #1
0
 def ProcessIndustry(self, file):
     print("process file %s" % file)
     try:
         with open(file, "r", encoding="utf8") as f:
             rows = csv.DictReader(f)
             for row in rows:
                 if (row["Status"] != "2"):
                     continue
                 if (row["IndustrialWaterConsumption"] == 0):
                     continue
                 data = {}
                 data["_id"] = row["Year"] + "_" + row[
                     "County"] + "_" + row["Category"]
                 data["Area"] = row["Area"]
                 data["Category"] = row["Category"]
                 data["County"] = row["County"]
                 data["IndustrialWaterConsumption"] = util.ToFloat(
                     row["IndustrialWaterConsumption"])
                 data["IndustryArea"] = util.ToFloat(row["IndustryArea"])
                 data["Status"] = row["Status"]
                 data["Year"] = util.ToInt(row["Year"])
                 query = self.db["waterUseIndustry"].find_one(data["_id"])
                 if query is None:
                     self.db["waterUseIndustry"].insert_one(data)
     except:
         print(sys.exc_info()[0])
         traceback.print_exc()
Beispiel #2
0
    def CollectDataFromFolder(self,folder):
        count = 0
        for filename in os.listdir(folder):
            print(str(count)+" process "+folder+"/"+filename)
            with open(folder+"/"+filename, 'r') as f:
                lines = f.readlines()
                batch = {}
                for line in lines:
                    data = line.split(" ")
                    lat,lng = util.TW97ToLatLng(util.ToInt(data[0]),util.ToInt(data[1]))
                    d = {}
                    d["lat"] = lat
                    d["lng"] = lng
                    d["elev"] = util.ToFloat(data[2])
                    #self.AddGridData(d)
                    for level in range(self.levelNum):
                        scale = self.gridPerUnit/math.pow(2,level)
                        gridX = int(d["lng"]*scale)
                        gridY = int(d["lat"]*scale)

                        key = str(level)+"-"+str(gridX)+"-"+str(gridY)
                        if key in batch:
                            batch[key]["num"] += 1
                            batch[key]["elevSum"] += d["elev"]
                        else:
                            batch[key] = {
                                "lev":level,
                                "x":gridX,
                                "y":gridY,
                                "num":1,
                                "elevSum":d["elev"]
                            }
                self.AddGridBatch(batch)
            count += 1    
Beispiel #3
0
    def ProcessTyphoon(self, url):
        print("process typhoon url: %s" % url)
        try:
            r = requests.get(url)
            #r.encoding = "utf-8"
            if r.status_code == requests.codes.all_okay:
                soup = BeautifulSoup(r.text, 'html.parser')
                typhoon = soup.find_all("tropicalcyclone")
                ops = []
                for tp in typhoon:
                    for pos in tp.analysis_data.find_all("fix"):
                        data = {}
                        if not tp.typhoon_name is None:
                            data["typhoon_name"] = tp.typhoon_name.string
                        if not tp.cwb_typhoon_name is None:
                            data[
                                "cwb_typhoon_name"] = tp.cwb_typhoon_name.string
                        if not tp.cwb_td_no is None:
                            data["cwb_td_no"] = tp.cwb_td_no.string

                        dateStr = pos.fix_time.string
                        dateStr = ''.join(dateStr.rsplit(':', 1))  #去掉時區的:
                        dateObj = datetime.datetime.strptime(
                            dateStr, "%Y-%m-%dT%H:%M:%S%z")
                        data["time"] = dateObj

                        tpID = ""
                        if "typhoon_name" in data:
                            tpID += data["typhoon_name"]
                        if "cwb_typhoon_name" in data:
                            tpID += data["cwb_typhoon_name"]
                        if "cwb_td_no" in data:
                            tpID += data["cwb_td_no"]
                        data["_id"] = tpID + "_" + dateStr

                        latlng = pos.coordinate.string.split(",")
                        data["lat"] = util.ToFloat(latlng[1])
                        data["lng"] = util.ToFloat(latlng[0])
                        data["max_wind_speed"] = util.ToFloat(
                            pos.max_wind_speed.string)
                        data["max_gust_speed"] = util.ToFloat(
                            pos.max_gust_speed.string)
                        data["pressure"] = util.ToFloat(pos.pressure.string)
                        data["circle_of_15ms"] = util.ToFloat(
                            pos.circle_of_15ms.radius.string)
                        data["circle_of_25ms"] = util.ToFloat(
                            pos.circle_of_25ms.radius.string)
                        key = {"_id": data["_id"]}
                        #self.db["typhoon"].update(key,data,upsert=True)
                        ops.append(
                            pymongo.UpdateOne(key, {"$set": data},
                                              upsert=True))
                if len(ops) > 0:
                    self.db["typhoon"].bulk_write(ops, ordered=False)
        except:
            print(sys.exc_info()[0])
            traceback.print_exc()
Beispiel #4
0
 def ProcessReservoirSiltation(self, file):
     print("process file %s" % file)
     try:
         with open(file, "r", encoding="utf8") as f:
             rows = csv.DictReader(f)
             for row in rows:
                 data = {}
                 data["_id"] = row["Year"] + "_" + row["ReservoirName"]
                 data["Area"] = row["Area"]
                 data["CurruntCapacity"] = util.ToFloat(
                     row["CurruntCapacity"])
                 data["CurruntEffectiveCapacity"] = util.ToFloat(
                     row["CurruntEffectiveCapacity"])
                 data["DesignedCapacity"] = util.ToFloat(
                     row["DesignedCapacity"])
                 data["DesignedEffectiveCapacity"] = util.ToFloat(
                     row["DesignedEffectiveCapacity"])
                 data["ReservoirName"] = row["ReservoirName"]
                 data["ReservoirSedimentationVolume"] = util.ToFloat(
                     row["ReservoirSedimentationVolume"])
                 data[
                     "TheLastestMeasuredTimeOfReservoirCapacity"] = util.ToFloat(
                         row["TheLastestMeasuredTimeOfReservoirCapacity"])
                 data["Year"] = util.ToInt(row["Year"])
                 query = self.db["reservoirSiltation"].find_one(data["_id"])
                 if query is None:
                     self.db["reservoirSiltation"].insert_one(data)
     except:
         print(sys.exc_info()[0])
         traceback.print_exc()
Beispiel #5
0
 def ProcessLiving(self, file):
     print("process file %s" % file)
     try:
         with open(file, "r", encoding="utf8") as f:
             rows = csv.DictReader(f)
             for row in rows:
                 if (row["Status"] != "2"):
                     continue
                 data = {}
                 data["_id"] = row["Year"] + "_" + row["County"]
                 data["Area"] = row["Area"]
                 data["County"] = row["County"]
                 data[
                     "DistributedWaterQuantityPerPersonPerDay"] = util.ToFloat(
                         row["DistributedWaterQuantityPerPersonPerDay"])
                 data[
                     "DomesticWaterConsumptionPerPersonPerDay"] = util.ToFloat(
                         row["DomesticWaterConsumptionPerPersonPerDay"])
                 data["SelfIntakePopulation"] = util.ToFloat(
                     row["SelfIntakePapulation"])
                 data["SelfIntakeWaterConsumption"] = util.ToFloat(
                     row["SelfIntakeWaterConsumption"])
                 data[
                     "SelfIntakeWaterConsumptionPerPersonPerDay"] = util.ToFloat(
                         row["SelfIntakeWaterConsumptionPerPersonPerDay"])
                 data["Status"] = row["Status"]
                 data["TapWaterConsumption"] = util.ToFloat(
                     row["TapWaterConsumption"])
                 data["TapWaterPopulation"] = util.ToFloat(
                     row["TapWaterPopulation"])
                 data["TotalPopulation"] = util.ToFloat(
                     row["TotalPopulation"])
                 data["WaterSalesPerPersonPerDay"] = util.ToFloat(
                     row["WaterSalesPerPersonPerDay"])
                 data["Year"] = util.ToInt(row["Year"])
                 query = self.db["waterUseLiving"].find_one(data["_id"])
                 if query is None:
                     self.db["waterUseLiving"].insert_one(data)
     except:
         print(sys.exc_info()[0])
         traceback.print_exc()
Beispiel #6
0
    def ProcessAgriculture(self, file):
        print("process file %s" % file)
        try:
            with open(file, "r", encoding="utf8") as f:
                rows = csv.DictReader(f)
                for row in rows:
                    if (row["Status"] != "2"):
                        continue
                    data = {}
                    data["_id"] = row["Year"] + "_" + row[
                        "IrrigationAssociation"]
                    data["Area"] = row["Area"]
                    data[
                        "FirstPhaseMiscellaneousIrrigationArea"] = util.ToFloat(
                            row["FirstPhaseMiscellaneousIrrigationArea"])
                    data[
                        "FirstPhaseMiscellaneousWaterConsumption"] = util.ToFloat(
                            row["FirstPhaseMiscellaneousWaterConsumption"])
                    data["FirstPhaseRiceIrrigationArea"] = util.ToFloat(
                        row["FirstPhaseRiceIrrigationArea"])
                    data["FirstPhaseRiceWaterConsumption"] = util.ToFloat(
                        row["FirstPhaseRiceWaterConsumption"])
                    data["IrrigationAssociation"] = row[
                        "IrrigationAssociation"]
                    data[
                        "SecondPhaseMiscellaneousIrrigationArea"] = util.ToFloat(
                            row["SecondPhaseMiscellaneousIrrigationArea"])
                    data[
                        "SecondPhaseMiscellaneousWaterConsumption"] = util.ToFloat(
                            row["SecondPhaseMiscellaneousWaterConsumption"])
                    data["SecondPhaseRiceIrrigationArea"] = util.ToFloat(
                        row["SecondPhaseRiceIrrigationArea"])
                    data["SecondPhaseRiceWaterConsumption"] = util.ToFloat(
                        row["SecondPhaseRiceWaterConsumption"])
                    data["Status"] = row["Status"]
                    data["Year"] = util.ToInt(row["Year"])
                    query = self.db["waterUseAgriculture"].find_one(
                        data["_id"])
                    if query is None:
                        self.db["waterUseAgriculture"].insert_one(data)

        except:
            print(sys.exc_info()[0])
            traceback.print_exc()
Beispiel #7
0
    def on_message(self, client, userdata, msg):
        print("recieve topic " + msg.topic)
        try:
            message = msg.payload.decode("utf-8")
            #parse message
            fieldArr = message.split("|")
            data = {}
            for field in fieldArr:
                f = field.split("=")
                if len(f) == 2:
                    data[f[0]] = f[1]

            #save to database
            d = {}
            d["device_id"] = data["device_id"]
            d["lat"] = util.ToFloat(data["gps_lat"])
            d["lng"] = util.ToFloat(data["gps_lon"])
            t = datetime.datetime.strptime(data["date"] + " " + data["time"],
                                           '%Y-%m-%d %H:%M:%S')
            t = t.replace(minute=(t.minute - t.minute % 10), second=0)
            t = t.replace(tzinfo=pytz.utc).astimezone(taiwan)
            d["time"] = t
            d["s_t0"] = util.ToFloat(data["s_t0"])  #水溫(-20.0~150.0C)
            d["s_ph"] = util.ToFloat(data["s_ph"])  #酸鹼度(0.00~-14.00)
            d["s_ec"] = util.ToFloat(data["s_ec"])  #導電度(0~200000 uS/cm)
            d["s_Tb"] = util.ToFloat(data["s_Tb"])  #濁度(0~10000 NTU)
            d["s_Lv"] = util.ToFloat(data["s_Lv"])  #水位(0.000~20.000 M)
            d["s_DO"] = util.ToFloat(data["s_DO"])  #溶氧(DO 0.00~12.00 mg/L)
            d["s_orp"] = util.ToFloat(
                data["s_orp"])  #氧化還原電位(ORP -2000~2000 mV)
            print(d)

            key = {"device_id": d["device_id"], "time": d["time"]}
            day = datetime.datetime.strftime(t, "%Y%m%d")
            self.db["waterbox" + day].update(key, d, upsert=True)

        except:
            print(sys.exc_info()[0])
            traceback.print_exc()
Beispiel #8
0
    def ProcessTide(self, url):
        print("process tide url: %s" % url)
        try:
            r = requests.get(url)
            #r.encoding = "utf-8"
            if r.status_code == requests.codes.all_okay:
                result = r.json()
                ops = {}
                tideArr = result["cwbopendata"]["dataset"]["location"]
                for tide in tideArr:
                    for time in tide["time"]:
                        dateStr = ''.join(time["obsTime"].rsplit(':',
                                                                 1))  #去掉時區的:
                        dateObj = datetime.datetime.strptime(
                            dateStr, "%Y-%m-%dT%H:%M:%S%z")
                        t10min = dateObj.replace(minute=(dateObj.minute -
                                                         dateObj.minute % 10),
                                                 second=0)
                        dayStr = dateObj.strftime('%Y%m%d')
                        if not dayStr in ops:
                            ops[dayStr] = []

                        value = time["weatherElement"]["elementValue"]["value"]
                        if not value:
                            continue
                        d = {}
                        d["stationID"] = tide["stationId"]
                        d["time"] = t10min
                        d["value"] = util.ToFloat(value) * 0.01
                        key = {"stationID": d["stationID"], "time": d["time"]}
                        ops[dayStr].append(
                            pymongo.UpdateOne(key, {"$set": d}, upsert=True))
                for key in ops:
                    self.db["tide" + key].create_index("stationID")
                    self.db["tide" + key].create_index("time")
                    if len(ops[key]) > 0:
                        self.db["tide" + key].bulk_write(ops[key],
                                                         ordered=False)
        except:
            print(sys.exc_info()[0])
            traceback.print_exc()
Beispiel #9
0
 def ProcessMonthWaterUse(self, file):
     print("process file %s" % file)
     try:
         with open(file, "r", encoding="utf8") as f:
             rows = csv.DictReader(f)
             for row in rows:
                 data = {}
                 data["_id"] = row["Year"] + "_" + row["Month"] + "_" + row[
                     "County"] + "_" + row["Town"]
                 data["County"] = row["County"]
                 data["Month"] = util.ToInt(row["Month"])
                 data[
                     "TheDailyDomesticConsumptionOfWaterPerPerson"] = util.ToFloat(
                         row["TheDailyDomesticConsumptionOfWaterPerPerson"])
                 data["Town"] = row["Town"]
                 data["Year"] = util.ToInt(row["Year"])
                 query = self.db["monthWaterUse"].find_one(data["_id"])
                 if query is None:
                     self.db["monthWaterUse"].insert_one(data)
     except:
         print(sys.exc_info()[0])
         traceback.print_exc()
Beispiel #10
0
    def ProcessRain(self, url):
        print("process rain url: %s" % url)
        try:
            r = requests.get(url)
            #r.encoding = "utf-8"
            if r.status_code == requests.codes.all_okay:
                root = ET.fromstring(r.text)
                pos = root.tag.find("}")
                ns = root.tag[:pos + 1]

                stationArr = []
                locHash = {}
                dataArr = []
                for location in root.findall(ns + 'location'):
                    data = {}
                    station = {}
                    dateStr = location.find(ns + "time").find(ns +
                                                              "obsTime").text
                    dateStr = ''.join(dateStr.rsplit(':', 1))  #去掉時區的:
                    dateObj = datetime.datetime.strptime(
                        dateStr, "%Y-%m-%dT%H:%M:%S%z")
                    #雨量從00:10累積到隔天00:00,原始資料以累積結束時間當資料時間,這邊減10分鐘以累積起始時間當資料時間
                    dateObj = dateObj - datetime.timedelta(minutes=10)
                    data["time"] = dateObj
                    sID = location.find(ns + "stationId").text
                    sName = location.find(ns + "locationName").text
                    lat = util.ToFloat(location.find(ns + "lat").text)
                    lon = util.ToFloat(location.find(ns + "lon").text)
                    data["stationID"] = sID
                    station["stationID"] = sID
                    station["name"] = sName
                    station["lat"] = lat
                    station["lon"] = lon
                    locHash[sID] = {"lat": lat, "lon": lon}

                    for elem in location.findall(ns + "weatherElement"):
                        if (elem[0].text == "HOUR_12"):
                            data["hour12"] = util.ToFloat(elem[1][0].text)
                        elif (elem[0].text == "HOUR_24"):
                            data["hour24"] = util.ToFloat(elem[1][0].text)
                        elif (elem[0].text == "NOW"):
                            data["now"] = util.ToFloat(elem[1][0].text)
                    if math.isnan(data["now"]) or data["now"] < 0:
                        continue

                    for param in location.findall(ns + "parameter"):
                        if (param[0].text == "CITY"):
                            station["city"] = param[1].text
                        elif (param[0].text == "TOWN"):
                            station["town"] = param[1].text

                    dataArr.append(data)
                    stationArr.append(station)

                #print(dataArr)
                #print(stationArr)
                opSite = []
                for s in stationArr:
                    key = {"stationID": s["stationID"]}
                    #self.db["rainStation"].update(key,s,upsert=True)
                    opSite.append((pymongo.UpdateOne(key, {"$set": s},
                                                     upsert=True)))
                if len(opSite) > 0:
                    self.db["rainStation"].bulk_write(opSite, ordered=False)

                opData = {}
                opDaily = []
                op10min = []
                gridArr = {}
                for d in dataArr:
                    dayStr = d["time"].strftime('%Y%m%d')
                    if dayStr not in opData:
                        opData[dayStr] = []
                    if dayStr not in gridArr:
                        gridArr[dayStr] = []

                    key = {"stationID": d["stationID"], "time": d["time"]}
                    query = self.db["rain" + dayStr].find_one(key)
                    if query is None:
                        #self.db["rain"+dayStr].insert_one(d)
                        opData[dayStr].append(
                            pymongo.UpdateOne(key, {"$set": d}, upsert=True))
                        inc = {}
                        loc = locHash[d["stationID"]]
                        area = util.LatToArea(loc["lat"])
                        inc[area + "Sum"] = d["now"]
                        inc[area + "Num"] = 1
                        tday = d["time"].replace(hour=0, minute=0, second=0)
                        t10min = d["time"].replace(
                            minute=(d["time"].minute - d["time"].minute % 10),
                            second=0)
                        #self.db["rainDailySum"].update({"time":tday},{"$inc":inc},upsert=True)
                        #self.db["rain10minSum"].update({"time":t10min},{"$inc":inc},upsert=True)
                        opDaily.append(
                            pymongo.UpdateOne({"time": tday}, {"$inc": inc},
                                              upsert=True))
                        op10min.append(
                            pymongo.UpdateOne({"time": t10min}, {"$inc": inc},
                                              upsert=True))
                        #self.grid.AddGridRain(d)
                        grid = d.copy()
                        grid["lat"] = loc["lat"]
                        grid["lon"] = loc["lon"]
                        gridArr[dayStr].append(grid)

                for key in opData:
                    self.db["rain" + key].create_index("stationID")
                    self.db["rain" + key].create_index("time")
                    if len(opData[key]) > 0:
                        self.db["rain" + key].bulk_write(opData[key],
                                                         ordered=False)

                if len(opDaily) > 0:
                    self.db["rainDailySum"].bulk_write(opDaily, ordered=False)
                if len(op10min) > 0:
                    self.db["rain10minSum"].bulk_write(op10min, ordered=False)

                for key in gridArr:
                    self.grid.AddGridBatch("rainGrid" + key, gridArr[key],
                                           "time", ["now"], "lat", "lon")
        except:
            print(sys.exc_info()[0])
            traceback.print_exc()
Beispiel #11
0
    def ProcessWind(self, url):
        print("process wind url: %s" % url)
        try:
            r = requests.get(url)
            #r.encoding = "utf-8"
            if r.status_code == requests.codes.all_okay:
                result = r.json()
                locArr = result["cwbopendata"]["location"]
                stationArr = []
                dataArr = []
                for loc in locArr:
                    station = {}
                    station["stationID"] = loc["stationId"]
                    station["name"] = loc["locationName"]
                    station["lat"] = util.ToFloat(loc["lat"])
                    station["lon"] = util.ToFloat(loc["lon"])
                    for param in loc["parameter"]:
                        if (param["parameterName"] == "CITY"):
                            station["city"] = param["parameterValue"]
                        elif (param["parameterName"] == "TOWN"):
                            station["town"] = param["parameterValue"]
                    stationArr.append(station)

                    data = {}
                    t = loc["time"]["obsTime"].replace(":", "")
                    t = datetime.datetime.strptime(t, '%Y-%m-%dT%H%M%S%z')
                    data["time"] = t
                    data["stationID"] = station["stationID"]
                    for elem in loc["weatherElement"]:
                        if (elem["elementName"] == "ELEV"):
                            data["ELEV"] = util.ToFloat(
                                elem["elementValue"]["value"])
                        elif (elem["elementName"] == "WDIR"):
                            data["WDIR"] = util.ToFloat(
                                elem["elementValue"]["value"])
                        elif (elem["elementName"] == "WDSD"):
                            data["WDSD"] = util.ToFloat(
                                elem["elementValue"]["value"])
                        elif (elem["elementName"] == "TEMP"):
                            data["TEMP"] = util.ToFloat(
                                elem["elementValue"]["value"])
                        elif (elem["elementName"] == "HUMD"):
                            data["HUMD"] = util.ToFloat(
                                elem["elementValue"]["value"])
                        elif (elem["elementName"] == "PRES"):
                            data["PRES"] = util.ToFloat(
                                elem["elementValue"]["value"])
                    key = {
                        "stationID": data["stationID"],
                        "time": data["time"]
                    }
                    dataArr.append(data)

                opSite = []
                for s in stationArr:
                    key = {"stationID": s["stationID"]}
                    opSite.append((pymongo.UpdateOne(key, {"$set": s},
                                                     upsert=True)))
                if len(opSite) > 0:
                    self.db["windStation"].bulk_write(opSite, ordered=False)

                opData = {}
                for d in dataArr:
                    dayStr = d["time"].strftime('%Y%m%d')
                    if dayStr not in opData:
                        opData[dayStr] = []

                    key = {"stationID": d["stationID"], "time": d["time"]}
                    opData[dayStr].append(
                        pymongo.UpdateOne(key, {"$set": d}, upsert=True))

                for key in opData:
                    self.db["wind" + key].create_index("stationID")
                    self.db["wind" + key].create_index("time")
                    if len(opData[key]) > 0:
                        self.db["wind" + key].bulk_write(opData[key],
                                                         ordered=False)

        except:
            print(sys.exc_info()[0])
            traceback.print_exc()
Beispiel #12
0
 def ProcessReservoirUse(self, file):
     print("process file %s" % file)
     try:
         with open(file, "r", encoding="utf8") as f:
             rows = csv.DictReader(f)
             for row in rows:
                 data = {}
                 data["_id"] = row["Year"] + "_" + row["ReservoirName"]
                 data["Area"] = row["Area"]
                 data["BackWaterVolumeOfPowerGeneration"] = util.ToFloat(
                     row["BackWaterVolumeOfPowerGeneration"])
                 data[
                     "DischargeWaterVolumeOfPowerGeneration"] = util.ToFloat(
                         row["DischargeWaterVolumeOfPowerGeneration"])
                 data["EndYearStoragedWater"] = util.ToFloat(
                     row["EndYearStoragedWater"])
                 data["EndYearWaterLevel"] = util.ToFloat(
                     row["EndYearWaterLevel"])
                 data["FlushingVolume"] = util.ToFloat(
                     row["FlushingVolume"])
                 data["GrossVolumeOfWaterConsumptionForAgricultureWater"] = util.ToFloat(
                     row["GrossVolumeOfWaterConsumptionForAgricultureWater"]
                 )
                 data[
                     "GrossVolumeOfWaterConsumptionForAllPurposes"] = util.ToFloat(
                         row["GrossVolumeOfWaterConsumptionForAllPurposes"])
                 data[
                     "GrossVolumeOfWaterConsumptionForDomesticWater"] = util.ToFloat(
                         row["GrossVolumeOfWaterConsumptionForDomesticWater"]
                     )
                 data["GrossVolumeOfWaterConsumptionForIndustrialWater"] = util.ToFloat(
                     row["GrossVolumeOfWaterConsumptionForIndustrialWater"])
                 data["InflowVolume"] = util.ToFloat(row["InflowVolume"])
                 data["InitialStorageWater"] = util.ToFloat(
                     row["InitialStorageWater"])
                 data["LeakageVolume"] = util.ToFloat(row["LeakageVolume"])
                 data["OthersDischargeVolume"] = util.ToFloat(
                     row["OthersDischargeVolume"])
                 data["ReservoirName"] = row["ReservoirName"]
                 data["SedimentationVariation"] = util.ToFloat(
                     row["SedimentationVariation"])
                 data["Year"] = util.ToInt(row["Year"])
                 query = self.db["reservoirUse"].find_one(data["_id"])
                 if query is None:
                     self.db["reservoirUse"].insert_one(data)
     except:
         print(sys.exc_info()[0])
         traceback.print_exc()
Beispiel #13
0
    def ProcessOverview(self, file):
        print("process file %s" % file)
        try:
            with open(file, "r", encoding="utf8") as f:
                with open(file, "r", encoding="utf8") as f:
                    rows = csv.reader(f)
                    for row in rows:
                        serial = util.ToFloat(row[0])
                        if (math.isnan(serial)):
                            continue
                        if (row[2] == "0"):
                            continue
                        data = {}
                        data["_id"] = row[1][0:-1]
                        data["Year"] = util.ToInt(data["_id"])
                        data["TotalWaterSupply"] = util.ToFloat(row[2].replace(
                            ",", ""))
                        data["WaterSupplyRiver"] = util.ToFloat(row[4].replace(
                            ",", ""))
                        data["WaterSupplyReservoir"] = util.ToFloat(
                            row[5].replace(",", ""))
                        data["WaterSupplyUnderGround"] = util.ToFloat(
                            row[6].replace(",", ""))
                        data["TotalWaterUse"] = util.ToFloat(row[7].replace(
                            ",", ""))
                        data["WaterUseAgriculture"] = util.ToFloat(
                            row[9].replace(",", ""))
                        data["WaterUseLivestock"] = util.ToFloat(
                            row[10].replace(",", ""))
                        data["WaterUseCultivation"] = util.ToFloat(
                            row[11].replace(",", ""))
                        data["WaterUseLiving"] = util.ToFloat(row[12].replace(
                            ",", ""))
                        data["WaterUseIndustry"] = util.ToFloat(
                            row[13].replace(",", ""))
                        query = self.db["waterUseOverview"].find_one(
                            data["_id"])
                        if query is None:
                            self.db["waterUseOverview"].insert_one(data)

        except:
            print(sys.exc_info()[0])
            traceback.print_exc()