Ejemplo n.º 1
0
def readShopAll(fileShopName):
    """
    使用到的变量:
    gloV.communityVariableName
    gloV.communityVariableNamePosition
    gloV.communityDataType

    存储的变量
    gloV.communityVariableNameStr
    gloV.communityDataAll
    """

    # ---得到编码
    fileShopEncoding = Main.try_encoding(fileShopName)
    if fileShopEncoding == "NO_FILE.":
        print('<%s> 文件无法打开. 程序退出*********' % fileShopName)
        return -1
    if fileShopEncoding == "FAILED.":
        print("<%s> 文件编码不明. 程序退出*********" % fileShopName)
        return -2
    fileShop = open(fileShopName, encoding=fileShopEncoding)

    # ---读取列标题
    gloV.shopVariableNameStr = fileShop.readline().strip()
    gloV.shopVariableName = gloV.shopVariableNameStr.split(',')
    if not (gloV.shopVariableName[0] == '经度' and gloV.shopVariableName[1] == '纬度'):
        print('<全国宠物店.csv> 文件前两列不是规定的 [经度, 纬度] 顺序, 程序退出******')
        return -3

    # ---读取全国数据
    readFailed = False
    shopRead = []

    tmpReadLine = 'Begin'
    tmpReadCount = 1
    while tmpReadLine:
        try:
            tmpReadLine = fileShop.readline()
        except Exception:
            tmpReadCount += 1
            print('文件第 %d 行包含未知汉字, 读取失败, 已跳过.' % tmpReadCount)
            readFailed = True
            continue
        tmpReadCount += 1

        i = tmpReadLine
        # 检测 i 中是否有<引号">,如果有则必须成对出现,而且中间部分必须不以<逗号,>切分
        tmpRead = i.strip()
        tmpCountDouHao = tmpRead.count(',')
        tmpCountYinHao = tmpRead.count('"')
        if tmpCountDouHao == (len(gloV.shopVariableName) - 1):
            tmpReadResult = tmpRead.split(",")
            shopRead.append(tmpReadResult)
        elif tmpCountYinHao == 2:
            # 当恰有一对引号出现, 而且没有引号出现在结尾or开头处时, 这段程序可以解析
            tmpReadList = tmpRead.split('"')
            tmpReadHead = tmpReadList[0][:-1].split(",")
            tmpReadTail = tmpReadList[2][1:].split(',')
            tmpReadHead.append("NULL")
            tmpReadHead.extend(tmpReadTail)
            tmpReadResult = tmpReadHead

            tmpCountDouHaoAgain = 0
            for k in tmpReadResult:
                tmpCountDouHaoAgain += k.count(',')
            if tmpCountDouHaoAgain == 0:
                shopRead.append(tmpReadResult)
            else:
                readFailed = True
                print('文件第 %d 行出错: %s' % (tmpReadCount, tmpRead))
        else:
            readFailed = True
            print('文件第 %d 行出错: %s' % (tmpReadCount, tmpRead))
    fileShop.close()
    if readFailed:
        print("****** 以上内容无法解析, 列出的数据跳过 ******\n")

    # ---数据清洗
    for i in shopRead:
        tmpData = []
        for j in range(0, len(i)):
            tmpData0 = i[j]
            if gloV.shopVariableName[j] in gloV.shopDataType:
                tmpData1 = Main.clean_data(tmpData0, gloV.shopDataType[gloV.shopVariableName[j]])
            else:
                tmpData1 = Main.clean_data(tmpData0, 6)
            tmpData.append(tmpData1)
        gloV.shopDataAll.append(tmpData)

    # ---gloV.SAVE[0] 保存变量名称的字符串
    gloV.SAVE = [""]  # [0]

    gloV.SAVE[0] += "shopVariableNameStr,"
    gloV.SAVE.append(gloV.shopVariableNameStr)  # [1]

    gloV.SAVE[0] += "shopDataAll,"
    gloV.SAVE.append(gloV.shopDataAll)  # [2]

    return 0
Ejemplo n.º 2
0
def readCommunityAll(fileCommunityName):
    """
    临时使用的变量:
    gloV.communityVariableName
    gloV.communityVariableNamePosition
    gloV.communityDataType

    存储的变量
    gloV.communityVariableNameStr
    gloV.communityDataAll
    """

    # ---得到编码
    fileCommunityEncoding = Main.try_encoding(fileCommunityName)
    if fileCommunityEncoding == "NO_FILE.":
        print('<%s> 文件无法打开. 程序退出*********' % fileCommunityName)
        return -1
    if fileCommunityEncoding == "FAILED.":
        print("<%s> 文件编码不明. 程序退出*********" % fileCommunityName)
        return -2
    fileCommunity = open(fileCommunityName, encoding=fileCommunityEncoding)

    # ---读取列标题
    gloV.communityVariableNameStr = fileCommunity.readline().strip()
    gloV.communityVariableName = gloV.communityVariableNameStr.split(',')
    if not (gloV.communityVariableName[0] == "经度" and gloV.communityVariableName[1] == "纬度" and
            gloV.communityVariableName[2] == "区县" and gloV.communityVariableName[3] == "容积率" and
            gloV.communityVariableName[4] == "均价" and gloV.communityVariableName[5] == "现有户数"):
        print("<Community.csv> 前6列不是规定的 [经度,纬度,区县,容积率,均价,现有户数] 顺序,程序退出******")
        return -3

    for i in range(0, len(gloV.communityVariableName)):
        gloV.communityVariableNamePosition[gloV.communityVariableName[i]] = i

    # ---数据读取
    readFailed = False
    communityRead = []
    tmpReadLine = 'Begin'
    tmpReadCount = 1
    while tmpReadLine:
        try:
            tmpReadLine = fileCommunity.readline()
        except Exception:
            tmpReadCount += 1
            print('文件第 %d 行包含未知汉字, 读取失败, 已跳过.' % tmpReadCount)
            readFailed = True
            continue
        tmpReadCount += 1

        i = tmpReadLine
        # 检测 i 中是否有<引号">,如果有则必须成对出现,而且中间部分必须不以<逗号,>切分
        tmpRead = i.strip()
        tmpCountDouHao = tmpRead.count(',')
        tmpCountYinHao = tmpRead.count('"')
        if tmpCountDouHao == (len(gloV.communityVariableName) - 1):
            tmpReadResult = tmpRead.split(",")
            communityRead.append(tmpReadResult)
        elif tmpCountYinHao == 2:
            # 当恰有一对引号出现, 而且没有引号出现在结尾or开头处时, 这段程序可以解析
            tmpReadList = tmpRead.split('"')
            tmpReadHead = tmpReadList[0][:-1].split(",")
            tmpReadTail = tmpReadList[2][1:].split(',')
            tmpReadHead.append("NULL")
            tmpReadHead.extend(tmpReadTail)
            tmpReadResult = tmpReadHead
            tmpCountDouHaoAgain = 0
            for k in tmpReadResult:
                tmpCountDouHaoAgain += k.count(',')

            if tmpCountDouHaoAgain == 0:
                communityRead.append(tmpReadResult)
            else:
                readFailed = True
                print('文件第 %d 行不符合CSV文件格式: %s' % (tmpReadCount, tmpRead))
        else:
            readFailed = True
            print('文件第 %d 行不符合CSV文件格式: %s' % (tmpReadCount, tmpRead))
    fileCommunity.close()
    if readFailed:
        print("****** 以上内容无法解析, 列出的数据跳过 ******\n")
    print('全国社区数据已读入 <%d> 行数据.\n' % len(communityRead))

    # ---数据清洗
    for i in communityRead:
        tmpData1 = []
        for j in range(0, len(i)):
            tmpData0 = i[j]
            if gloV.communityVariableName[j] in gloV.communityDataType:
                tmpData = Main.clean_data(tmpData0, gloV.communityDataType[gloV.communityVariableName[j]])
            else:
                tmpData = Main.clean_data(tmpData0, 6)
            tmpData1.append(tmpData)
        gloV.communityDataAll.append(tmpData1)

    # ---Save
    gloV.SAVE_COMMUNITY = [""]  # [0]

    gloV.SAVE_COMMUNITY[0] += "communityVariableNameStr,"
    gloV.SAVE_COMMUNITY.append(gloV.communityVariableNameStr)  # [1]

    gloV.SAVE_COMMUNITY[0] += "communityDataAll,"
    gloV.SAVE_COMMUNITY.append(gloV.communityDataAll)  # [2]

    return 0