def readShopAll(fileShopName): """ 使用到的变量: gloV.communityVariableName gloV.communityVariableNamePosition gloV.communityDataType 存储的变量 gloV.communityVariableNameStr gloV.communityDataAll """ # ---得到编码 fileShopEncoding = Main.try_encoding(fileShopName) if fileShopEncoding == "NO_FILE.": print('<%s> 文件无法打开. 程序退出*********' % fileShopName) return -1 if fileShopEncoding == "FAILED.": print("<%s> 文件编码不明. 程序退出*********" % fileShopName) return -2 fileShop = open(fileShopName, encoding=fileShopEncoding) # ---读取列标题 gloV.shopVariableNameStr = fileShop.readline().strip() gloV.shopVariableName = gloV.shopVariableNameStr.split(',') if not (gloV.shopVariableName[0] == '经度' and gloV.shopVariableName[1] == '纬度'): print('<全国宠物店.csv> 文件前两列不是规定的 [经度, 纬度] 顺序, 程序退出******') return -3 # ---读取全国数据 readFailed = False shopRead = [] tmpReadLine = 'Begin' tmpReadCount = 1 while tmpReadLine: try: tmpReadLine = fileShop.readline() except Exception: tmpReadCount += 1 print('文件第 %d 行包含未知汉字, 读取失败, 已跳过.' % tmpReadCount) readFailed = True continue tmpReadCount += 1 i = tmpReadLine # 检测 i 中是否有<引号">,如果有则必须成对出现,而且中间部分必须不以<逗号,>切分 tmpRead = i.strip() tmpCountDouHao = tmpRead.count(',') tmpCountYinHao = tmpRead.count('"') if tmpCountDouHao == (len(gloV.shopVariableName) - 1): tmpReadResult = tmpRead.split(",") shopRead.append(tmpReadResult) elif tmpCountYinHao == 2: # 当恰有一对引号出现, 而且没有引号出现在结尾or开头处时, 这段程序可以解析 tmpReadList = tmpRead.split('"') tmpReadHead = tmpReadList[0][:-1].split(",") tmpReadTail = tmpReadList[2][1:].split(',') tmpReadHead.append("NULL") tmpReadHead.extend(tmpReadTail) tmpReadResult = tmpReadHead tmpCountDouHaoAgain = 0 for k in tmpReadResult: tmpCountDouHaoAgain += k.count(',') if tmpCountDouHaoAgain == 0: shopRead.append(tmpReadResult) else: readFailed = True print('文件第 %d 行出错: %s' % (tmpReadCount, tmpRead)) else: readFailed = True print('文件第 %d 行出错: %s' % (tmpReadCount, tmpRead)) fileShop.close() if readFailed: print("****** 以上内容无法解析, 列出的数据跳过 ******\n") # ---数据清洗 for i in shopRead: tmpData = [] for j in range(0, len(i)): tmpData0 = i[j] if gloV.shopVariableName[j] in gloV.shopDataType: tmpData1 = Main.clean_data(tmpData0, gloV.shopDataType[gloV.shopVariableName[j]]) else: tmpData1 = Main.clean_data(tmpData0, 6) tmpData.append(tmpData1) gloV.shopDataAll.append(tmpData) # ---gloV.SAVE[0] 保存变量名称的字符串 gloV.SAVE = [""] # [0] gloV.SAVE[0] += "shopVariableNameStr," gloV.SAVE.append(gloV.shopVariableNameStr) # [1] gloV.SAVE[0] += "shopDataAll," gloV.SAVE.append(gloV.shopDataAll) # [2] return 0
def readCommunityAll(fileCommunityName): """ 临时使用的变量: gloV.communityVariableName gloV.communityVariableNamePosition gloV.communityDataType 存储的变量 gloV.communityVariableNameStr gloV.communityDataAll """ # ---得到编码 fileCommunityEncoding = Main.try_encoding(fileCommunityName) if fileCommunityEncoding == "NO_FILE.": print('<%s> 文件无法打开. 程序退出*********' % fileCommunityName) return -1 if fileCommunityEncoding == "FAILED.": print("<%s> 文件编码不明. 程序退出*********" % fileCommunityName) return -2 fileCommunity = open(fileCommunityName, encoding=fileCommunityEncoding) # ---读取列标题 gloV.communityVariableNameStr = fileCommunity.readline().strip() gloV.communityVariableName = gloV.communityVariableNameStr.split(',') if not (gloV.communityVariableName[0] == "经度" and gloV.communityVariableName[1] == "纬度" and gloV.communityVariableName[2] == "区县" and gloV.communityVariableName[3] == "容积率" and gloV.communityVariableName[4] == "均价" and gloV.communityVariableName[5] == "现有户数"): print("<Community.csv> 前6列不是规定的 [经度,纬度,区县,容积率,均价,现有户数] 顺序,程序退出******") return -3 for i in range(0, len(gloV.communityVariableName)): gloV.communityVariableNamePosition[gloV.communityVariableName[i]] = i # ---数据读取 readFailed = False communityRead = [] tmpReadLine = 'Begin' tmpReadCount = 1 while tmpReadLine: try: tmpReadLine = fileCommunity.readline() except Exception: tmpReadCount += 1 print('文件第 %d 行包含未知汉字, 读取失败, 已跳过.' % tmpReadCount) readFailed = True continue tmpReadCount += 1 i = tmpReadLine # 检测 i 中是否有<引号">,如果有则必须成对出现,而且中间部分必须不以<逗号,>切分 tmpRead = i.strip() tmpCountDouHao = tmpRead.count(',') tmpCountYinHao = tmpRead.count('"') if tmpCountDouHao == (len(gloV.communityVariableName) - 1): tmpReadResult = tmpRead.split(",") communityRead.append(tmpReadResult) elif tmpCountYinHao == 2: # 当恰有一对引号出现, 而且没有引号出现在结尾or开头处时, 这段程序可以解析 tmpReadList = tmpRead.split('"') tmpReadHead = tmpReadList[0][:-1].split(",") tmpReadTail = tmpReadList[2][1:].split(',') tmpReadHead.append("NULL") tmpReadHead.extend(tmpReadTail) tmpReadResult = tmpReadHead tmpCountDouHaoAgain = 0 for k in tmpReadResult: tmpCountDouHaoAgain += k.count(',') if tmpCountDouHaoAgain == 0: communityRead.append(tmpReadResult) else: readFailed = True print('文件第 %d 行不符合CSV文件格式: %s' % (tmpReadCount, tmpRead)) else: readFailed = True print('文件第 %d 行不符合CSV文件格式: %s' % (tmpReadCount, tmpRead)) fileCommunity.close() if readFailed: print("****** 以上内容无法解析, 列出的数据跳过 ******\n") print('全国社区数据已读入 <%d> 行数据.\n' % len(communityRead)) # ---数据清洗 for i in communityRead: tmpData1 = [] for j in range(0, len(i)): tmpData0 = i[j] if gloV.communityVariableName[j] in gloV.communityDataType: tmpData = Main.clean_data(tmpData0, gloV.communityDataType[gloV.communityVariableName[j]]) else: tmpData = Main.clean_data(tmpData0, 6) tmpData1.append(tmpData) gloV.communityDataAll.append(tmpData1) # ---Save gloV.SAVE_COMMUNITY = [""] # [0] gloV.SAVE_COMMUNITY[0] += "communityVariableNameStr," gloV.SAVE_COMMUNITY.append(gloV.communityVariableNameStr) # [1] gloV.SAVE_COMMUNITY[0] += "communityDataAll," gloV.SAVE_COMMUNITY.append(gloV.communityDataAll) # [2] return 0