def standardizing():
    datapath = 'MergedDataSecond.csv'
    resultList = []
    with open(datapath, 'r', encoding='utf-8') as f:
        data_csv = csv.DictReader(f)
        for row in data_csv:
            dictTemp = {}

            timeUnStandard = (row['\ufeff年月日时分'])
            dataString = timeUnStandard[0:10]
            hourString = timeUnStandard[11:13]
            minuteString = timeUnStandard[13:]
            timeStandard = dataString + ' ' + hourString + ':' + minuteString + ':' + '00'
            dictTemp['时间'] = timeStandard
            timeDatetime = datetime.strptime(timeStandard, '%Y-%m-%d %H:%M:%S')

            area = row['area'].upper()
            if area != 'Y' and timeDatetime.month == 4:
                dictTemp['区域'] = area
                # dictTemp[]
                dictTemp['气温'] = row['气温']
                dictTemp['风向'] = row['风向']
                dictTemp['风速'] = row['风速']
                dictTemp['雨量'] = row['雨量']
                dictTemp['AQI'] = row['AQI']
                dictTemp['空气质量'] = row['AIRQUALITY']
                resultList.append(dictTemp)
                print(dictTemp)
                # print(dictTemp)
    hearders = ['时间', '区域', '气温', '风向', '风速', '雨量', 'AQI', '空气质量']
    filename = '清洗完毕天气与空气质量总表'
    writeData(resultList, headers=hearders, filename=filename)
Beispiel #2
0
def merge():
    trafficDataPath = '公交出租总表.csv'
    AirAndPollutionDataPath = '清洗完毕天气与空气质量总表.csv'

    dataList = []
    trafficFile = open(trafficDataPath, 'r', encoding='utf-8')
    traffic_csv = csv.DictReader(trafficFile)
    for trafficDataRow in traffic_csv:
        # print(trafficDataRow)
        mergeDictTemp = {}
        trafficTimeTemp = datetime.strptime(trafficDataRow['时间'],
                                            '%Y-%m-%d %H:%M:%S')
        trafficArea = trafficDataRow['区域']
        mergeDictTemp['时间'] = trafficTimeTemp
        mergeDictTemp['区域'] = trafficArea
        mergeDictTemp['出租车数量'] = trafficDataRow['出租车数量']
        mergeDictTemp['公交车数量'] = trafficDataRow['公交车数量']
        with open(AirAndPollutionDataPath, 'r', encoding='utf-8') as AirFile:
            air_csv = csv.DictReader(AirFile)
            flag = 0
            for airDataRow in air_csv:
                # print(airDataRow)
                if airDataRow['时间'] != '时间':
                    airTimeTemp = datetime.strptime(airDataRow['时间'],
                                                    '%Y-%m-%d %H:%M:%S')
                    airArea = airDataRow['区域']
                    # 相等 则存入数据
                    if trafficTimeTemp == airTimeTemp and trafficArea == airArea:
                        flag = 1
                        # print(airDataRow)
                        print(trafficDataRow)
                        mergeDictTemp['气温'] = airDataRow['气温']
                        mergeDictTemp['风向'] = airDataRow['风向']
                        mergeDictTemp['风速'] = airDataRow['风速']
                        mergeDictTemp['雨量'] = airDataRow['雨量']
                        mergeDictTemp['AQI'] = airDataRow['AQI']
                        mergeDictTemp['空气质量'] = airDataRow['空气质量']
                        print(mergeDictTemp)
                        dataList.append(mergeDictTemp)
                        # print('**')
                        break

            if flag == 0:
                # print(trafficDataRow)
                print()
        print('***')
    headers = [
        '时间', '区域', '出租车数量', '公交车数量', '气温', '风向', '风速', '雨量', 'AQI', '空气质量'
    ]
    fileName = '汇合的表格'
    writeData(dataList, headers, fileName)
Beispiel #3
0
def deleteDateOfB():
    '''
    除掉表中的B区数据
    :return:
    '''
    cleanedList = []
    busDataPath = '公交与出租.csv'
    with open(busDataPath, 'r', encoding='utf-8') as f:
        data_csv = csv.DictReader(f)
        for row in data_csv:
            if row['区域'] != 'B':
                cleanedList.append(row)
    dataHeaders = ['时间', '区域', '出租车数量', '公交车数量']
    fileName = '公交出租总表'
    writeData(cleanedList, dataHeaders, fileName)
Beispiel #4
0
def getBusTimeAndCount(area, timeHeader):
    dataPath = '公交数据/' + area + '.csv'
    print('正在打开...', dataPath)
    busCountList = [{'时间': 0, '区域': 0, '数量': 0}]
    with open(dataPath, 'r', encoding='utf-8') as f:
        data_csv = csv.DictReader(f)
        for row in data_csv:
            # print(row['2015-01-01 07:00:53'])
            busRealTime = datetime.strptime(row[timeHeader],
                                            '%Y-%m-%d %H:%M:%S')  # 真实时间
            if busRealTime.month == 4:  # 只计算四月的
                busDict = {}
                busStandardTime = standardTime(busRealTime)
                busDict['区域'] = area
                busDict['时间'] = busStandardTime
                if busDict['时间'] is None:
                    if busRealTime.day == 30 and (busRealTime.month == 4 or 6
                                                  or 9 or 11):
                        busDict['时间'] = datetime(year=busRealTime.year,
                                                 month=busRealTime.month + 1,
                                                 day=1)
                    else:
                        busDict['时间'] = datetime(year=busRealTime.year,
                                                 month=busRealTime.month,
                                                 day=busRealTime.day + 1)
                # print(busDict)

                flag = 0  # 是否找到
                for busCount in busCountList:
                    if busCount['时间'] == busDict['时间'] and busCount[
                            '区域'] == busDict['区域']:
                        print('区域与时间相等')
                        print('本次循环的出租车信息(taxiDict):', busDict)
                        print('List中对应的出租车信息(taxiCount):', busCount)
                        busCountTemp = busCount['数量']
                        busCount['数量'] = busCountTemp + 1
                        flag = 1

                        break
                if flag == 0:
                    busDict['数量'] = 1
                    busCountList.append(busDict)

    f.close()
    dataHeaders = ['时间', '区域', '数量']
    fileName = '公交车数量' + area + '区域数据'
    writeData(busCountList, dataHeaders, fileName)
Beispiel #5
0
def mergeData():
    taxiDataPath = '出租车数量总表.csv'
    taxiFile = open(taxiDataPath, 'r', encoding='utf-8')

    mergedList = []
    taxi_csv = csv.DictReader(taxiFile)
    for taxiRow in taxi_csv:
        # 正常存入时间 区域 出租车数量
        mergedDictTemp = {}
        areaTemp = taxiRow['区域']
        # print(taxiRow['时间'] + ':00')

        timeTemp = datetime.strptime(taxiRow['时间']+':00', '%Y/%m/%d %H:%M:%S')  # 转化为datatime格式
        mergedDictTemp['时间'] = timeTemp
        mergedDictTemp['区域'] = areaTemp
        mergedDictTemp['出租车数量'] = taxiRow['数量']
        # 根据区域检查公交车数量是否齐全
        busDataPath = '公交车数量数据/公交车数量' + areaTemp + '区域数据.csv'
        # print('此时的公交车区域为:::', areaTemp)
        if areaTemp != 'B':
            with open(busDataPath, 'r', encoding='utf-8') as busFile:
                bus_csv = csv.DictReader(busFile)
                busFlag = 0
                for busRow in bus_csv:
                    # 时间转化为datatime格式
                    if busRow['时间'] != '0' and busRow['时间'] != '时间':
                        busTime = datetime.strptime(busRow['时间'], '%Y-%m-%d %H:%M:%S')
                        if timeTemp == busTime:
                            mergedDictTemp['公交车数量'] = busRow['数量']
                            busFlag = 1
                if busFlag == 0:
                    print('公交车数据不全,现在的数据状态为:::', mergedDictTemp)
                    mergedDictTemp['公交车数量'] = 0
                    print('之后的状态', mergedDictTemp)
        mergedList.append(mergedDictTemp)
    dataHeaders = ['时间', '区域', '出租车数量', '公交车数量']
    fileName = '公交与出租'
    writeData(mergedList, dataHeaders, fileName)