def standardizing(): datapath = 'MergedDataSecond.csv' resultList = [] with open(datapath, 'r', encoding='utf-8') as f: data_csv = csv.DictReader(f) for row in data_csv: dictTemp = {} timeUnStandard = (row['\ufeff年月日时分']) dataString = timeUnStandard[0:10] hourString = timeUnStandard[11:13] minuteString = timeUnStandard[13:] timeStandard = dataString + ' ' + hourString + ':' + minuteString + ':' + '00' dictTemp['时间'] = timeStandard timeDatetime = datetime.strptime(timeStandard, '%Y-%m-%d %H:%M:%S') area = row['area'].upper() if area != 'Y' and timeDatetime.month == 4: dictTemp['区域'] = area # dictTemp[] dictTemp['气温'] = row['气温'] dictTemp['风向'] = row['风向'] dictTemp['风速'] = row['风速'] dictTemp['雨量'] = row['雨量'] dictTemp['AQI'] = row['AQI'] dictTemp['空气质量'] = row['AIRQUALITY'] resultList.append(dictTemp) print(dictTemp) # print(dictTemp) hearders = ['时间', '区域', '气温', '风向', '风速', '雨量', 'AQI', '空气质量'] filename = '清洗完毕天气与空气质量总表' writeData(resultList, headers=hearders, filename=filename)
def merge(): trafficDataPath = '公交出租总表.csv' AirAndPollutionDataPath = '清洗完毕天气与空气质量总表.csv' dataList = [] trafficFile = open(trafficDataPath, 'r', encoding='utf-8') traffic_csv = csv.DictReader(trafficFile) for trafficDataRow in traffic_csv: # print(trafficDataRow) mergeDictTemp = {} trafficTimeTemp = datetime.strptime(trafficDataRow['时间'], '%Y-%m-%d %H:%M:%S') trafficArea = trafficDataRow['区域'] mergeDictTemp['时间'] = trafficTimeTemp mergeDictTemp['区域'] = trafficArea mergeDictTemp['出租车数量'] = trafficDataRow['出租车数量'] mergeDictTemp['公交车数量'] = trafficDataRow['公交车数量'] with open(AirAndPollutionDataPath, 'r', encoding='utf-8') as AirFile: air_csv = csv.DictReader(AirFile) flag = 0 for airDataRow in air_csv: # print(airDataRow) if airDataRow['时间'] != '时间': airTimeTemp = datetime.strptime(airDataRow['时间'], '%Y-%m-%d %H:%M:%S') airArea = airDataRow['区域'] # 相等 则存入数据 if trafficTimeTemp == airTimeTemp and trafficArea == airArea: flag = 1 # print(airDataRow) print(trafficDataRow) mergeDictTemp['气温'] = airDataRow['气温'] mergeDictTemp['风向'] = airDataRow['风向'] mergeDictTemp['风速'] = airDataRow['风速'] mergeDictTemp['雨量'] = airDataRow['雨量'] mergeDictTemp['AQI'] = airDataRow['AQI'] mergeDictTemp['空气质量'] = airDataRow['空气质量'] print(mergeDictTemp) dataList.append(mergeDictTemp) # print('**') break if flag == 0: # print(trafficDataRow) print() print('***') headers = [ '时间', '区域', '出租车数量', '公交车数量', '气温', '风向', '风速', '雨量', 'AQI', '空气质量' ] fileName = '汇合的表格' writeData(dataList, headers, fileName)
def deleteDateOfB(): ''' 除掉表中的B区数据 :return: ''' cleanedList = [] busDataPath = '公交与出租.csv' with open(busDataPath, 'r', encoding='utf-8') as f: data_csv = csv.DictReader(f) for row in data_csv: if row['区域'] != 'B': cleanedList.append(row) dataHeaders = ['时间', '区域', '出租车数量', '公交车数量'] fileName = '公交出租总表' writeData(cleanedList, dataHeaders, fileName)
def getBusTimeAndCount(area, timeHeader): dataPath = '公交数据/' + area + '.csv' print('正在打开...', dataPath) busCountList = [{'时间': 0, '区域': 0, '数量': 0}] with open(dataPath, 'r', encoding='utf-8') as f: data_csv = csv.DictReader(f) for row in data_csv: # print(row['2015-01-01 07:00:53']) busRealTime = datetime.strptime(row[timeHeader], '%Y-%m-%d %H:%M:%S') # 真实时间 if busRealTime.month == 4: # 只计算四月的 busDict = {} busStandardTime = standardTime(busRealTime) busDict['区域'] = area busDict['时间'] = busStandardTime if busDict['时间'] is None: if busRealTime.day == 30 and (busRealTime.month == 4 or 6 or 9 or 11): busDict['时间'] = datetime(year=busRealTime.year, month=busRealTime.month + 1, day=1) else: busDict['时间'] = datetime(year=busRealTime.year, month=busRealTime.month, day=busRealTime.day + 1) # print(busDict) flag = 0 # 是否找到 for busCount in busCountList: if busCount['时间'] == busDict['时间'] and busCount[ '区域'] == busDict['区域']: print('区域与时间相等') print('本次循环的出租车信息(taxiDict):', busDict) print('List中对应的出租车信息(taxiCount):', busCount) busCountTemp = busCount['数量'] busCount['数量'] = busCountTemp + 1 flag = 1 break if flag == 0: busDict['数量'] = 1 busCountList.append(busDict) f.close() dataHeaders = ['时间', '区域', '数量'] fileName = '公交车数量' + area + '区域数据' writeData(busCountList, dataHeaders, fileName)
def mergeData(): taxiDataPath = '出租车数量总表.csv' taxiFile = open(taxiDataPath, 'r', encoding='utf-8') mergedList = [] taxi_csv = csv.DictReader(taxiFile) for taxiRow in taxi_csv: # 正常存入时间 区域 出租车数量 mergedDictTemp = {} areaTemp = taxiRow['区域'] # print(taxiRow['时间'] + ':00') timeTemp = datetime.strptime(taxiRow['时间']+':00', '%Y/%m/%d %H:%M:%S') # 转化为datatime格式 mergedDictTemp['时间'] = timeTemp mergedDictTemp['区域'] = areaTemp mergedDictTemp['出租车数量'] = taxiRow['数量'] # 根据区域检查公交车数量是否齐全 busDataPath = '公交车数量数据/公交车数量' + areaTemp + '区域数据.csv' # print('此时的公交车区域为:::', areaTemp) if areaTemp != 'B': with open(busDataPath, 'r', encoding='utf-8') as busFile: bus_csv = csv.DictReader(busFile) busFlag = 0 for busRow in bus_csv: # 时间转化为datatime格式 if busRow['时间'] != '0' and busRow['时间'] != '时间': busTime = datetime.strptime(busRow['时间'], '%Y-%m-%d %H:%M:%S') if timeTemp == busTime: mergedDictTemp['公交车数量'] = busRow['数量'] busFlag = 1 if busFlag == 0: print('公交车数据不全,现在的数据状态为:::', mergedDictTemp) mergedDictTemp['公交车数量'] = 0 print('之后的状态', mergedDictTemp) mergedList.append(mergedDictTemp) dataHeaders = ['时间', '区域', '出租车数量', '公交车数量'] fileName = '公交与出租' writeData(mergedList, dataHeaders, fileName)